Overview

Exploration of all the data collected on the presence points + randomly generated background points

A note to anyone who might happen to stumble across this… I am a beginner in R and have had no exposure to similar languages. I don’t know what I’m doing. The code herein is unlikely to be elegant and there are probably more efficient ways of running the code.

Built with ‘r getRversion()’.

Package dependencies

You can load them using the following code which uses a function called ipak. Note this function checks to see if the packages are installed first. The “include=FALSE” supresses the package installation text appearing in the document…

presence points

Read in the presence points

presence <- read.csv("../output/bio/presence_points_without_envdata_relooped_glbathy.csv", header = TRUE)
head(presence)

Update: Who sampled in which year and which month:

year

inst_by_yr <- with(presence, table(year, institutioncode))
write.csv(inst_by_yr, file = "../output/bio/institutioncode_by_yr.csv")
inst_by_yr
      institutioncode
year   ARC DFOCENARC DFOGulf DFOISDM DFOMTMS DFONL DFOQC MaineDMR NEFSC ROM
  1998   1         0      48       1      21   582     3        0     1   0
  1999   0         0      89       1      74   503     0        0     0   0
  2000  10         0      98       7      58   532     0        0     0   0
  2001   0         0      50       1      44   546     0        0     0   0
  2002   7         0     103       1      52   493     0        0     0   0
  2003   0         0      42       3      42   573     0        0     0   0
  2004   0         0      90       2      24   668   177        0     0   0
  2005   1        14     108       0      83   472   216        7     0   0
  2006   4        11      78       4      80   480   234        6     0   0
  2007   1        42      85       1      37   467   233        0     0   2
  2008   0        10      81       5      38   477   263        0     0   0
  2009   0        10      78       0      30   511   125        3     0   0
  2010   1        10      94       0      44   529   109        0     0   0
  2011   0        22      77       0      13   479   139        0     0   0
  2012   0         8      85       0      10     0   148        0     0   0
  2013   2        18      68       0      21     0   127        0     0   0
  2014   1        22     101       0       6     0     0        0     0   0
  2015   3        12       0       0       0     0     0        0     0   0

month

inst_by_mt <- with(presence, table(month, institutioncode))
write.csv(inst_by_mt, file = "../output/bio/institutioncode_by_mth.csv")
inst_by_mt
     institutioncode
month  ARC DFOCENARC DFOGulf DFOISDM DFOMTMS DFONL DFOQC MaineDMR NEFSC  ROM
   1     0         0       0       0       0   263     0        0     0    0
   2     0         0       0       0       7     4     0        0     0    0
   3     1         0       0       3     342     0     0        0     0    0
   4     0         0       0       3      11   732     0        0     1    0
   5     1         0       0       0       0  1106    17        8     0    0
   6     1         0       0       0       0  1694    24        3     0    0
   7    20        19       0      17     304    52    33        0     0    2
   8     1        73       7       1      14     0  1735        0     0    0
   9     8        23    1380       3       0     6    14        0     0    0
   10    0        63       5       3       0   545     2        2     0    0
   11    8         1       0       3       0  1811     0        3     0    0
   12    0         0       0       1       0  1136     0        0     0    0

environmental correlates

  1. Get the max, min, an mean values and add into a dataframe

Temperature

temp_depth_max <- max(presence$temp_depth, na.rm = TRUE)
temp_depth_min <- min(presence$temp_depth, na.rm = TRUE)
temp_depth_mean <- mean(presence$temp_depth, na.rm = TRUE)
temp_surface_max <- max(presence$temp_surface, na.rm = TRUE)
temp_surface_min <- min(presence$temp_surface, na.rm = TRUE)
temp_surface_mean <- mean(presence$temp_surface, na.rm = TRUE)

Salinity

sal_depth_max <- max(presence$salinity_depth, na.rm = TRUE)
sal_depth_min <- min(presence$salinity_depth, na.rm = TRUE)
sal_depth_mean <- mean(presence$salinity_depth, na.rm = TRUE)
sal_surface_max <- max(presence$salinity_surface, na.rm = TRUE)
sal_surface_min <- min(presence$salinity_surface, na.rm = TRUE)
sal_surface_mean <- mean(presence$salinity_surface, na.rm = TRUE)

Chl

chl_depth_max <- max(presence$chl_depth, na.rm = TRUE)
chl_depth_min <- min(presence$chl_depth, na.rm = TRUE)
chl_depth_mean <- mean(presence$chl_depth, na.rm = TRUE)
chl_surface_max <- max(presence$chl_surface, na.rm = TRUE)
chl_surface_min <- min(presence$chl_surface, na.rm = TRUE)
chl_surface_mean <- mean(presence$chl_surface, na.rm = TRUE)

O2

o2_depth_max <- max(presence$o2_depth, na.rm = TRUE)
o2_depth_min <- min(presence$o2_depth, na.rm = TRUE)
o2_depth_mean <- mean(presence$o2_depth, na.rm = TRUE)
o2_surface_max <- max(presence$o2_surface, na.rm = TRUE)
o2_surface_min <- min(presence$o2_surface, na.rm = TRUE)
o2_surface_mean <- mean(presence$o2_surface, na.rm = TRUE)

MLP

mlp_surface_max <- max(presence$mlp_surface, na.rm = TRUE)
mlp_surface_min <- min(presence$mlp_surface, na.rm = TRUE)
mlp_surface_mean <- mean(presence$mlp_surface, na.rm = TRUE)

SSH

ssh_surface_max <- max(presence$ssh_surface, na.rm = TRUE)
ssh_surface_min <- min(presence$ssh_surface, na.rm = TRUE)
ssh_surface_mean <- mean(presence$ssh_surface, na.rm = TRUE)

create matrix

td <- c(temp_depth_max, temp_depth_min, temp_depth_mean)
ts <- c(temp_surface_max, temp_surface_min, temp_surface_mean)
sd <- c(sal_depth_max, sal_depth_min, sal_depth_mean)
ss <- c(sal_surface_max, sal_surface_min, sal_surface_mean)
cd <- c(sal_depth_max, sal_depth_min, sal_depth_mean)
cs <- c(sal_surface_max, sal_surface_min, sal_surface_mean)
od <- c(o2_depth_max, o2_depth_min, o2_depth_mean)
os <- c(o2_surface_max, o2_surface_min, o2_surface_mean)
mlp <- c(mlp_surface_max, mlp_surface_min, mlp_surface_mean)
ssh <- c(ssh_surface_max, ssh_surface_min, ssh_surface_mean)
env_stats <- rbind(td, ts, sd, ss, cd, cs, od, os, mlp, ssh)
row.names(env_stats) <- c("Temp Depth", "Temp Surface", "Salinity Depth", "Salinity Surface", "Chl Depth", "Chl Surface", "Oxygen Depth", "Oxygen Surface", "MLP", "SSH") 
colnames(env_stats) <- c("Max", "Min", "Mean")
write.csv(env_stats, "../output/env/env_correlates_basic_stats.csv", row.names = TRUE)
env_stats
                         Max         Min        Mean
Temp Depth       289.0834045 271.2114868 274.9217164
Temp Surface     293.3608093 271.6101074 280.4476878
Salinity Depth    35.5045395  22.1449070  33.4295117
Salinity Surface  34.3667259  13.5929441  30.2518319
Chl Depth         35.5045395  22.1449070  33.4295117
Chl Surface       34.3667259  13.5929441  30.2518319
Oxygen Depth     377.2959595   0.9998450 279.6701444
Oxygen Surface   385.2262878 238.3178253 315.1620514
MLP              108.2339201  10.6823719  18.1911518
SSH               -0.2825949  -0.8503166  -0.5334292

Hmm some potentially suspicious values here in - Oxygen Depth min - mlp Max and maybe also - Salinity Depth min - Salinity Surface min - CHL depth min - CHL depth max

lets just see where these values appear and check the netcdf layers. Note I will check these values in cygwin using the cdo operator infov on the netcdfs (to ensure there was no issue with processing in R).

get the year and month these values appeared in

o2d_check_yr <- subset(presence$year, presence$o2_depth == o2_depth_min)
o2d_check_mth <- subset(presence$month, presence$o2_depth == o2_depth_min)

O2 depth min is in 2005_08

And value seems correct

mlp_check_yr <- subset(presence$year, presence$mlp_surface == mlp_surface_max)
mlp_check_mth <- subset(presence$month, presence$mlp_surface == mlp_surface_max)

mlp max in in 2007_12

And yes the value seems correct

sald_check_yr <- subset(presence$year, presence$salinity_depth == sal_depth_max)
sald_check_mth <- subset(presence$month, presence$salinity_depth == sal_depth_max)

year month 2001_05

also seems correct

sals_check_yr <- subset(presence$year, presence$salinity_surface == sal_surface_min)
sals_check_mth <- subset(presence$month, presence$salinity_surface == sal_surface_min)

year month 21998_06

ok again…

Leave the rest

simple plots of env. correlates

temperature

par(mfrow=c(1,2))
plot(presence$temp_depth, main = "Temp at Depth (Various)", ylab = "Kelvin")
plot(presence$temp_surface, main = "Temp at Surface", ylab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_simpleplot.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

Hmm not necessarily very helpful. But anyway, lets carry on

salinity

par(mfrow=c(1,2))
plot(presence$salinity_depth, main = "Salinity at Depth (Various)", ylab = "Practical Salinity Units")
plot(presence$salinity_surface, main = "Salinity at Surface", ylab = "Practical Salinity Units")
dev.copy(png,"../output/env/simple_plots/salinity_simpleplot.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

Chlorophyll

par(mfrow=c(1,2))
plot(presence$chl_depth, main = "Chl at Depth (Various)", ylab = "Chl (mmol.m-3)")
plot(presence$chl_surface, main = "Chl at Surface", ylab = "Chl (mmol.m-3)")
dev.copy(png,"../output/env/simple_plots/chl_simpleplot.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

O2

par(mfrow=c(1,2))
plot(presence$o2_depth, main = "Dissolved Oxygen at Depth (Various)", ylab = "Chl (mmol.m-3)")
plot(presence$o2_surface, main = "Dissolved Oxygen at Surface", ylab = "O2 (mmol.m-3)")
dev.copy(png,"../output/env/simple_plots/o2_simpleplot.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

mlp

plot(presence$mlp_surface, main = "Mixed Layer Thickness", ylab = "Depth (m)")
dev.copy(png,"../output/env/simple_plots/mlp_simpleplot.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

SSH

plot(presence$ssh_surface, main = "Sea Surface Height", ylab = "Height (m)")
dev.copy(png,"../output/env/simple_plots/ssh_simpleplot.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

XXX SAM - YOU WANT TO THINK ABOUT DOING THIS BY DEPTH RATHER THAN ALL DEPTHS)

unique_depths <- unique(presence$depthlayerno)
unique_depthdordered <- sort(unique_depths) #just puts the list in oder with no NA
length(unique_depthdordered)
[1] 39

ouch - 39 layers… a job for a boxplot probably

frequency plots of env. corr

temperature

hist(presence$temp_surface, main = "Temp at Surface", xlab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

hist(presence$temp_dept, main = "Temp at Depth", xlab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_depth_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

chl

hist(presence$chl_surface, main = "Chlorophyll at Surface", xlab = "Chlorophyll Concentrations (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/chl_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

hist(presence$chl_depth, main = "Chlorophyll at Observation Depth", xlab = "Chlorophyll Concentrations (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/chl_depth_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

salinity

hist(presence$salinity_surface, main = "Salinity at Surface", xlab = "Salinity Practical Salinity Unit (PSU)")
dev.copy(png, "../output/env/simple_plots/sal_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

hist(presence$salinity_depth, main = "Salinity at Observation Depth", xlab = "Salinity Practical Salinity Unit (PSU)")
dev.copy(png, "../output/env/simple_plots/sal_depth_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

oxygen

hist(presence$o2_surface, main = "Dissolved Oxygen at Surface", xlab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/o2_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

hist(presence$o2_depth, main = "Dissolved Oxygen at Observation Depth", xlab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/o2_depth_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

MLP

hist(presence$mlp, main = "Mixed Layer Thickness at Observation", xlab = "Mixed Layer Thickness (m)")
dev.copy(png, "../output/env/simple_plots/mlp_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ssh

hist(presence$ssh, main = "Sea Surface Height at Observation", xlab = "Sea Surface Height (m)")
dev.copy(png, "../output/env/simple_plots/ssh_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

simple boxplots of env. corr

Same as above but with boxplots (may provide some more useful info)

individual plots of each variable

par(mfrow=c(1,2))
boxplot(presence$chl_depth, main = "Chlorphyll at Depth (Various)", ylab = "mmol.m-3")
boxplot(presence$chl_surface, main = "Chlorphyll at Surface", ylab = "mmol.m-3")
dev.copy(png, "../output/env/simple_plots/chl_boxplot.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$temp_depth ~ presence$month, xlab = "month", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per Month")
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$chl_depth ~ presence$month, xlab = "month", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Observation Depth per Month")
dev.copy(png, "../output/env/simple_plots/chlorophyll_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$salinity_depth ~ presence$month, xlab = "month", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Observation Depth per Month")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$o2_depth ~ presence$month, xlab = "month", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Observation Depth per Month")
dev.copy(png, "../output/env/simple_plots/o2_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$mlp_surface ~ presence$month, xlab = "month", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Observation per Month")
dev.copy(png, "../output/env/simple_plots/mlp_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$ssh_surface ~ presence$month, xlab = "month", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Observation per Month")
dev.copy(png, "../output/env/simple_plots/ssh_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$temp_depth ~ presence$year, xlab = "Year", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per Year")
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$chl_depth ~ presence$year, xlab = "Year", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Observation Depth per Year")
dev.copy(png, "../output/env/simple_plots/chl_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$salinity_depth ~ presence$year, xlab = "Year", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Observation Depth per Year")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$o2_depth ~ presence$year, xlab = "Year", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Observation Depth per Year")
dev.copy(png, "../output/env/simple_plots/oxygen_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$mlp ~ presence$year, xlab = "Year", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Observation per Year")
dev.copy(png, "../output/env/simple_plots/mlp_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$ssh ~ presence$year, xlab = "Year", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Observation per Year")
dev.copy(png, "../output/env/simple_plots/ssh_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$temp_surface ~ presence$month, xlab = "month", ylab = "Temperature (Kelvin)", main = "Temperature at Observation (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$chl_surface ~ presence$month, xlab = "month", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Observation (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/chlorophyll_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$salinity_surface ~ presence$month, xlab = "month", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Observation (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$o2_surface ~ presence$month, xlab = "month", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Observation (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/o2_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$mlp_surface ~ presence$month, xlab = "month", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Observation per Month")
dev.copy(png, "../output/env/simple_plots/mlp_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$ssh_surface ~ presence$month, xlab = "month", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Observation per Month")
dev.copy(png, "../output/env/simple_plots/ssh_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$temp_surface ~ presence$year, xlab = "Year", ylab = "Temperature (Kelvin)", main = "Temperature at Observation (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$chl_surface ~ presence$year, xlab = "Year", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Observation (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/chl_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$salinity_surface ~ presence$year, xlab = "Year", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Observation (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(presence$o2_surface ~ presence$year, xlab = "Year", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Observation (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/oxygen_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

background points

Read in the presence points

background <- read.csv("../output/bio/background_complete_obs_cels_globot.csv", header = TRUE)
head(background)

environmental correlates

  1. Get the max, min, an mean values and add into a dataframe

Temperature

temp_depth_max_back <- max(background$temp_depth, na.rm = TRUE)
temp_depth_min_back <- min(background$temp_depth, na.rm = TRUE)
temp_depth_mean_back <- mean(background$temp_depth, na.rm = TRUE)
temp_surface_max_back <- max(background$temp_surface, na.rm = TRUE)
temp_surface_min_back <- min(background$temp_surface, na.rm = TRUE)
temp_surface_mean_back <- mean(background$temp_surface, na.rm = TRUE)

Salinity

sal_depth_max_back <- max(background$salinity_depth, na.rm = TRUE)
sal_depth_min_back <- min(background$salinity_depth, na.rm = TRUE)
sal_depth_mean_back <- mean(background$salinity_depth, na.rm = TRUE)
sal_surface_max_back <- max(background$salinity_surface, na.rm = TRUE)
sal_surface_min_back <- min(background$salinity_surface, na.rm = TRUE)
sal_surface_mean_back <- mean(background$salinity_surface, na.rm = TRUE)

Chl

chl_depth_max_back <- max(background$chl_depth, na.rm = TRUE)
chl_depth_min_back <- min(background$chl_depth, na.rm = TRUE)
chl_depth_mean_back <- mean(background$chl_depth, na.rm = TRUE)
chl_surface_max_back <- max(background$chl_surface, na.rm = TRUE)
chl_surface_min_back <- min(background$chl_surface, na.rm = TRUE)
chl_surface_mean_back <- mean(background$chl_surface, na.rm = TRUE)

O2

o2_depth_max_back <- max(background$o2_depth, na.rm = TRUE)
o2_depth_min_back <- min(background$o2_depth, na.rm = TRUE)
o2_depth_mean_back <- mean(background$o2_depth, na.rm = TRUE)
o2_surface_max_back <- max(background$o2_surface, na.rm = TRUE)
o2_surface_min_back <- min(background$o2_surface, na.rm = TRUE)
o2_surface_mean_back <- mean(background$o2_surface, na.rm = TRUE)

MLP

mlp_surface_max_back <- max(background$mlp_surface, na.rm = TRUE)
mlp_surface_min_back <- min(background$mlp_surface, na.rm = TRUE)
mlp_surface_mean_back <- mean(background$mlp_surface, na.rm = TRUE)

SSH

ssh_surface_max_back <- max(background$ssh_surface, na.rm = TRUE)
ssh_surface_min_back <- min(background$ssh_surface, na.rm = TRUE)
ssh_surface_mean_back <- mean(background$ssh_surface, na.rm = TRUE)

create matrix

tdb <- c(temp_depth_max_back, temp_depth_min_back, temp_depth_mean_back)
tsb <- c(temp_surface_max_back, temp_surface_min_back, temp_surface_mean_back)
sdb <- c(sal_depth_max_back, sal_depth_min_back, sal_depth_mean_back)
ssb <- c(sal_surface_max_back, sal_surface_min_back, sal_surface_mean_back)
cdb <- c(sal_depth_max_back, sal_depth_min_back, sal_depth_mean_back)
csb <- c(sal_surface_max_back, sal_surface_min_back, sal_surface_mean_back)
odb <- c(o2_depth_max_back, o2_depth_min_back, o2_depth_mean_back)
osb <- c(o2_surface_max_back, o2_surface_min_back, o2_surface_mean_back)
mlpb <- c(mlp_surface_max_back, mlp_surface_min_back, mlp_surface_mean_back)
sshb <- c(ssh_surface_max_back, ssh_surface_min_back, ssh_surface_mean_back)
env_stats_back <- rbind(tdb, tsb, sdb, ssb, cdb, csb, odb, osb, mlpb, sshb)
row.names(env_stats_back) <- c("Temp Depth", "Temp Surface", "Salinity Depth", "Salinity Surface", "Chl Depth", "Chl Surface", "Oxygen Depth", "Oxygen Surface", "MLP", "SSH") 
colnames(env_stats_back) <- c("Max", "Min", "Mean")
write.csv(env_stats_back, "../output/env/env_correlates_background_basic_stats.csv", row.names = TRUE)
env_stats_back
                          Max         Min        Mean
Temp Depth        298.0517000 270.2798000 277.2492004
Temp Surface      298.2931000 271.0403000 278.9040097
Salinity Depth     36.2885400  12.8384380  33.3354335
Salinity Surface   35.9200800  12.0846500  32.3867768
Chl Depth          36.2885400  12.8384380  33.3354335
Chl Surface        35.9200800  12.0846500  32.3867768
Oxygen Depth      460.5706177   0.8061745 301.0655723
Oxygen Surface    410.5555000 207.4744000 315.9223887
MLP              2266.5013152   8.5859600  37.7989169
SSH                -0.1283472  -1.2465040  -0.6931972

temperature

par(mfrow=c(1,2))
plot(background$temp_depth, main = "Background Points Temp at Depth (Various)", ylab = "Kelvin")
plot(background$temp_surface, main = "Background Points Temp at Surface", ylab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
png 

salinity

par(mfrow=c(1,2))
plot(background$salinity_depth, main = "Background Points Salinity at Depth (Various)", ylab = "Practical Salinity Units")
plot(background$salinity_surface, main = "Background Points Salinity at Surface", ylab = "Practical Salinity Units")
dev.copy(png,"../output/env/simple_plots/salinity_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

Chlorophyll

par(mfrow=c(1,2))
plot(background$chl_depth, main = "Background Points Chl at Depth (Various)", ylab = "Chl (mmol.m-3)")
plot(background$chl_surface, main = "Background Points Chl at Surface", ylab = "Chl (mmol.m-3)")
dev.copy(png,"../output/env/simple_plots/chl_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

O2

par(mfrow=c(1,2))
plot(background$o2_depth, main = "Background Points Dissolved Oxygen at Depth (Various)", ylab = "Chl (mmol.m-3)")
plot(background$o2_surface, main = "Background Points Dissolved Oxygen at Surface", ylab = "O2 (mmol.m-3)")
dev.copy(png,"../output/env/simple_plots/o2_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

mlp

plot(background$mlp_surface, main = "Background Points Mixed Layer Thickness", ylab = "Depth (m)")
dev.copy(png,"../output/env/simple_plots/mlp_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

SSH

plot(background$ssh_surface, main = "Background Points Sea Surface Height", ylab = "Height (m)")
dev.copy(png,"../output/env/simple_plots/ssh_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

frequency plots of env. corr

temperature

hist(background$temp_surface, main = "Background Temp at Surface", xlab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

hist(background$temp_dept, main = "Background Temp at Depth", xlab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_background_depth_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

chl

hist(background$chl_surface, main = "Background Chlorophyll at Surface", xlab = "Chlorophyll Concentrations (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/chl_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

hist(background$chl_depth, main = "Background Chlorophyll at Observation Depth", xlab = "Chlorophyll Concentrations (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/chl_background_depth_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

salinity

hist(background$salinity_surface, main = "Background Salinity at Surface", xlab = "Salinity Practical Salinity Unit (PSU)")
dev.copy(png, "../output/env/simple_plots/sal_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

hist(background$salinity_depth, main = "Background Salinity at Observation Depth", xlab = "Salinity Practical Salinity Unit (PSU)")
dev.copy(png, "../output/env/simple_plots/sal_background_depth_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

oxygen

hist(background$o2_surface, main = "Background Dissolved Oxygen at Surface", xlab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/o2_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

hist(background$o2_depth, main = "Background Dissolved Oxygen at Observation Depth", xlab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/o2_background_depth_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

MLP

hist(background$mlp, main = "Background Mixed Layer Thickness at Observation", xlab = "Mixed Layer Thickness (m)")
dev.copy(png, "../output/env/simple_plots/mlp_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ssh

hist(background$ssh, main = "Background Sea Surface Height at Observation", xlab = "Sea Surface Height (m)")
dev.copy(png, "../output/env/simple_plots/ssh_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

simple boxplots of env. corr

Same as above but with boxplots (may provide some more useful info)

temperature

boxplot(background$temp_depth ~ background$month, xlab = "month", ylab = "Temperature (Kelvin)", main = "Temperature at Background Depth per Month")
dev.copy(png, "../output/env/simple_plots/temperature_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$chl_depth ~ background$month, xlab = "month", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Background Depth per Month")
dev.copy(png, "../output/env/simple_plots/chlorophyll_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$salinity_depth ~ background$month, xlab = "month", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Background Depth per Month")
dev.copy(png, "../output/env/simple_plots/salinity_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$o2_depth ~ background$month, xlab = "month", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Background Depth per Month")
dev.copy(png, "../output/env/simple_plots/o2_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$mlp_surface ~ background$month, xlab = "month", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Background per Month")
dev.copy(png, "../output/env/simple_plots/mlp_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$ssh_surface ~ background$month, xlab = "month", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Background per Month")
dev.copy(png, "../output/env/simple_plots/ssh_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$temp_depth ~ background$year, xlab = "Year", ylab = "Temperature (Kelvin)", main = "Temperature at Background Depth per Year")
dev.copy(png, "../output/env/simple_plots/temperature_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$chl_depth ~ background$year, xlab = "Year", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Background Depth per Year")
dev.copy(png, "../output/env/simple_plots/chl_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$salinity_depth ~ background$year, xlab = "Year", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Background Depth per Year")
dev.copy(png, "../output/env/simple_plots/salinity_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$o2_depth ~ background$year, xlab = "Year", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Background Depth per Year")
dev.copy(png, "../output/env/simple_plots/oxygen_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$mlp ~ background$year, xlab = "Year", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Background per Year")
dev.copy(png, "../output/env/simple_plots/mlp_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$ssh ~ background$year, xlab = "Year", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Background per Year")
dev.copy(png, "../output/env/simple_plots/ssh_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$temp_surface ~ background$month, xlab = "month", ylab = "Temperature (Kelvin)", main = "Temperature at Background (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/temperature_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$chl_surface ~ background$month, xlab = "month", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Background (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/chlorophyll_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$salinity_surface ~ background$month, xlab = "month", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Background (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/salinity_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$o2_surface ~ background$month, xlab = "month", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Background (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/o2_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$mlp_surface ~ background$month, xlab = "month", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Background per Month")
dev.copy(png, "../output/env/simple_plots/mlp_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$ssh_surface ~ background$month, xlab = "month", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Background per Month")
dev.copy(png, "../output/env/simple_plots/ssh_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$temp_surface ~ background$year, xlab = "Year", ylab = "Temperature (Kelvin)", main = "Temperature at Background (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/temperature_background_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$chl_surface ~ background$year, xlab = "Year", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Background (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/chl_background_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$salinity_surface ~ background$year, xlab = "Year", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Background (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/salinity_background_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

boxplot(background$o2_surface ~ background$year, xlab = "Year", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Background (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/oxygen_background_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

correlations between background points

check to see if there are any correlations in the env. variables for the background points

first subset the env.correlate columns (you don’t need everything)

background_env <- subset(background, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
background_env_cor <- cor(background_env, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(background_env_cor, "../output/env/background_env_corr.csv", row.names = TRUE)
background_corrplot <- corrplot(background_env_cor , method = "color", type = "upper", order = "alphabet", tl.cex = 0.8) 
dev.copy(png,"../output/env/simple_plots/background_envcorr.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

to get some density plots all in one graphic, you need to change chunk output to in console, then go to the plot tab, make it fill the screen, then run then make bigger then save :( (probably a better way - this seems to be an issue with RStudio)

par(mfrow=c(4,4))
for(i in 1:16){
  plot(density(background_env[,i], na.rm=T), main = names(background_env)[i])
}

PUT THE CHUNK OUTPUT BACK TO INLINE

add nafo region and gear type into the mix

first subset the env.correlate columns + bottom_depth (you don’t need everything)

background_envbotdepth <- subset(background, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth, bottom_depth))
background_envbotdepth_cor <- cor(background_envbotdepth, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(background_envbotdepth_cor, "../output/env/background_envbotdepth_cor.csv", row.names = TRUE)
background_corrplot <- corrplot(background_envbotdepth_cor , method = "color", type = "upper", order = "alphabet", tl.cex = 0.8) 
dev.copy(png,"../output/env/simple_plots/background_envbotdepth_cor.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

correlations between presence points

check to see if there are any correlations in the env. variables

first subset the env.correlate columns (you don’t need everything) then use cor to get the correlation values, and then corrplot for a visual

pres_env <- subset(presence, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth, bottom_depth))
pres_env_cor <- cor(pres_env, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(pres_env_cor, "../output/env/pres_env_corr.csv", row.names = TRUE)
pres_corrplot <- corrplot(pres_env_cor , method = "color", type = "upper", order = "alphabet", tl.cex = 0.8) 
dev.copy(png,"../output/env/simple_plots/pres_envcorr.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

to get some density plots all in one graphic, you need to change chunk output to in console, then go to the plot tab, make it fill the screen, then run then make bigger then save :( (probably a better way - this seems to be an issue with RStudio)

graphics.off()
tiff("../output/env/simple_plots/background_envcorr_density.tiff") # to automatically save the plot to a png AND show it inline
par(mfrow=c(4,4), mar=c(1,1,1,1))
for(i in 1:16){
  plot(density(pres_env[,i], na.rm=T), main = names(pres_env)[i])
}
dev.off() # stops automatic saving of the plot to a png
null device 
          1 

PUT THE CHUNK OUTPUT BACK TO INLINE

density plot with background and presence env. data

Inspired by Merrow 2013 - top paragraph of page 1063 (are the species observations uniformly distributed over the background, or are they skewed)

ggplot(pres_env, aes(x = temp_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/temp_depth_backvspres.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(pres_env, aes(x = temp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/temp_surface_backvspres.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(pres_env, aes(x = chl_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/chl_surface_backvspres.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(pres_env, aes(x = chl_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/chl_depth_backvspres.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(pres_env, aes(x = salinity_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/salinity_surface_backvspres.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(pres_env, aes(x = salinity_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/salinity_depth_backvspres.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(pres_env, aes(x = o2_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/o2_surface_backvspres.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(pres_env, aes(x = o2_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/o2_depth_backvspres.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(pres_env, aes(x = mlp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/mlp_backvspres.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(pres_env, aes(x = ssh_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/ssh_backvspres.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

NAFO Regions

compare the environmental correlates between different NAFO regions

first see which nafo zones were sampled in each year

nafo_by_yr <- with(presence, table(year, nafo_zone))
write.csv(nafo_by_yr, file = "../output/bio/nafozone_by_yr.csv")
nafo_by_yr
      nafo_zone
year    0A  0B  1C  2G  2H  2J  3K  3L  3M  3N  3O 3Pn 3Ps  4R  4S  4T 4Vn 4Vs  4W  4X  5Y 5Ze HudsonStrait
  1998   0   0   0   0  12  56 113 230   0  44  69   3  58   0   0  70   9   7   4   3   1   0            0
  1999   0   0   0   0   0  35 103 212   0  45  50   2  60   0   0  87  11  35  28   2   2   0            0
  2000   0   0   0   0   0  55 116 238   2  52  53   4  48   0   0  99  12  26  20   1   0   0            0
  2001   0   0   0   0   3  35 126 214   0  52  56   2  61   0   0  52   6  28  11   0   0   0            0
  2002   0   0   0   0   0  36  71 214   0  50  62   4  60   0   0 110   8  28  16   1   0   0            0
  2003   0   0   0   0   0  50  66 232   0  63  76   3  89   0   0  44   8  20  15   1   0   0            0
  2004   0   0   0   0   8  60 209 194   0  58  66   7  69  40  70 160  10  11   5   1   0   0            0
  2005   0   7   0   7   0  49  92 198   0  37  54   1  42  50 112 166  27  23  30   4   7   0            0
  2006   3   4   0   3  14  49 175 178   0  25  26   0  19  28 132 148  11  39  31   9   7   1            0
  2007   0  13   0   3   0  36  84 173   0  56  57   1  62  28 122 172  10  19  11   0   1   0           26
  2008   3   7   0   0   6  49  82 162   0  46  57   0  77  64 129 157   5  18  13   3   0   2            0
  2009   0   0   0   1   0  45 100 172   0  54  67   8  66  18  63 130   7   1  23   0   3   0            9
  2010   0   9   1   0   6  34 111 207   0  62  63   3  50  16  56 134  12  18  14   0   0   0            0
  2011   0   9   0   1   5  36  80 161   0  70  51  12  64  32  58 127   7   6   2   0   0   0           12
  2012   0   7   0   1   0   0   0   0   0   0   0   0   0  34  71 133   5   1   3   1   0   0            0
  2013   0   3   0   6   1   0   0   2   0   0   0   0   0  27  58 113  15   2   4   0   0   0            8
  2014   1  16   0   1   0   0   0   1   0   0   0   0   0   0   0 101   6   0   0   0   0   0            4
  2015   0   8   0   1   0   0   0   2   0   0   0   0   1   0   0   0   0   0   0   0   0   0            3

and by month

nafo_by_mth <- with(presence, table(nafo_zone, month))
write.csv(nafo_by_mth, file = "../output/bio/nafozone_by_mth.csv")
nafo_by_mth
              month
nafo_zone         1    2    3    4    5    6    7    8    9   10   11   12
  0A              0    0    0    0    0    0    0    3    0    3    1    0
  0B              0    0    0    0    0    0    0   54   23    6    0    0
  1C              0    0    0    0    0    0    0    1    0    0    0    0
  2G              0    0    0    0    0    0   16    8    0    0    0    0
  2H              0    0    0    0    0    0    2    0    0   50    0    3
  2J              6    0    0    0    0    0    0    0    0  125  381  113
  3K            255    4    0    0    0    0    0    2    0    0  610  657
  3L              2    0    0    0  105 1410   83    1    0   64  764  361
  3M              0    0    0    0    0    0    0    0    0    2    0    0
  3N              0    0    0    0  308  254    0    0    0  110   40    2
  3O              0    0    0   28  522   31    1    0    6  194   24    1
  3Pn             0    0    0   42    8    0    0    0    0    0    0    0
  3Ps             0    0    0  662  163    0    0    0    0    0    1    0
  4R              0    0    0    0    0    0   18  318    1    0    0    0
  4S              0    0    0    0    2   10    5  852    2    0    0    0
  4T              0    0    0    0   15   14   10  570 1387    7    0    0
  4Vn             0    0    0    1    0    0  145    8   15    0    0    0
  4Vs             0    0  178    2    0    0   99    3    0    0    0    0
  4W              0    6  163   11    0    0   44    4    0    1    1    0
  4X              0    0    3    0    0    0   20    0    0    2    1    0
  5Y              0    0    0    1    9    3    3    0    0    2    3    0
  5Ze             0    1    2    0    0    0    0    0    0    0    0    0
  HudsonStrait    0    0    0    0    0    0    1    7    0   54    0    0

Interesting that there is a point in 1C - this is outside Canadian waters…. anyway

density plot with background and presence env. data by NAFO region

What I want to see if if there is a marked difference between the env. correlates of the presence points between NAFO regions. This is also to try deal with sampling bias (b/c the whole region is not uniformly sampled in each month, but rather nafo regions have a strong month bias)

first create NAFO-region datasets

nafo0a <- subset(presence, nafo_zone == "0A")
nafo0b <- subset(presence, nafo_zone == "0B")
nafo1c <- subset(presence, nafo_zone == "1C")
nafo2g <- subset(presence, nafo_zone == "2G")
nafo2h <- subset(presence, nafo_zone == "2H")
nafo2j <- subset(presence, nafo_zone == "2J")
nafo3k <- subset(presence, nafo_zone == "3K")
nafo3l <- subset(presence, nafo_zone == "3L")
nafo3m <- subset(presence, nafo_zone == "3M")
nafo3n <- subset(presence, nafo_zone == "3N")
nafo3o <- subset(presence, nafo_zone == "3O")
nafo3ps <- subset(presence, nafo_zone == "3Ps")
nafo4r <- subset(presence, nafo_zone == "4R")
nafo4s <- subset(presence, nafo_zone == "4S")
nafo4t <- subset(presence, nafo_zone == "4T")
nafo4vn <- subset(presence, nafo_zone == "4Vn")
nafo4vs <- subset(presence, nafo_zone == "4Vs")
nafo4w <- subset(presence, nafo_zone == "4W")
nafo4x <- subset(presence, nafo_zone == "4X")
nafo5Y <- subset(presence, nafo_zone == "5Ze")
nafohudson <- subset(presence, nafo_zone == "HudsonStrait")

plot by each variable, less 3m (2 samples) 1c (one sample) and 5ze (zero samples?!)

ggplot(nafo0a, aes(x = o2_depth, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/o2_depth_nafo.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

Let’s plot the variables by nafo region/year then by month

pr98 <- subset(presence, year == "1998")
boxplot(pr98$temp_depth ~ pr98$nafo_zone, xlab = "NAFO Region", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per NAFO Zone")
pr98 <- subset(presence, year == "1998")
pr11 <- subset(presence, year == "2011")
par(mfrow=c(2,1))
boxplot(pr98$temp_depth ~ pr98$nafo_zone, xlab = "NAFO Region", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per NAFO Zone")
boxplot(pr11$temp_depth ~ pr11$nafo_zone, xlab = "NAFO Region", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per NAFO Zone")

ok scrap the nafo region analysis - it is too mixed up with month (so seeing if region or month is a factor that needs to be factored in to the model is not appropriate)

remap who sampled

Now lets check the number of records and spatial-temporal distribution of the observations by institution code to make sure none are dodgy

first a table of how many observations each instituioncode has

obs_by_ins <- count(presence, "institutioncode")
obs_by_ins
write.csv(obs_by_ins, file = "../output/bio/samplinginstitutions/no_observations_institutioncode.csv")

ok so IMR, NAFO, ICES, & USM have very few entries (1, 1, 1, and 3 respectively)

Lets take a look at the spatial breakdown of these institutions.First all points…

map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(38, 70), asp = 1, main = "All Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(presence$decimalLongitude, presence$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/all_occurrences.png") #this prints a png of the plot
dev.off() #this turns off the print command

Note there is one point up by iceland that you should get rid of (Icelandic population thought to be seperate from Labrador, but it is unclear if this is true or not).

Map the institutioncode == ARC only data…

ARC_obs <- presence[presence$institutioncode == "ARC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "Arc Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(ARC_obs$decimalLongitude, ARC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/ARC_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFOCENARC_obs <- presence[presence$institutioncode == "DFOCENARC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Central & Arctic Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOCENARC_obs$decimalLongitude, DFOCENARC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOCENARC_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFOGulf_obs <- presence[presence$institutioncode == "DFOGulf", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Gulf Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOGulf_obs$decimalLongitude, DFOGulf_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOGulf_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFOISDM_obs <- presence[presence$institutioncode == "DFOISDM", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO ISDM Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOISDM_obs$decimalLongitude, DFOISDM_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOISDM_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFOMTMS_obs <- presence[presence$institutioncode == "DFOMTMS", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Maritimes Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOMTMS_obs$decimalLongitude, DFOMTMS_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOMTMS_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFONL_obs <- presence[presence$institutioncode == "DFONL", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Newfouundland & Labrador Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFONL_obs$decimalLongitude, DFONL_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFONL_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFOQC_obs <- presence[presence$institutioncode == "DFOQC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Quebec Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOQC_obs$decimalLongitude, DFOQC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOQC_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

MaineDMR_obs <- presence[presence$institutioncode == "MaineDMR", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "Maine DMR Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(MaineDMR_obs$decimalLongitude, MaineDMR_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/MaineDMR_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

NEFSC_obs <- presence[presence$institutioncode == "NEFSC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "NEFSC Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(NEFSC_obs$decimalLongitude, NEFSC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/NEFSC_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

ROM_obs <- presence[presence$institutioncode == "ROM", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "Royal Ontario Museum Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(ROM_obs$decimalLongitude, ROM_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/ROM_occurrences_all.png") #this prints a png of the plot

check for gear type

what are the unique gear types you have in your presence data, and how many?

so the vast majority are trawls of some type.

map the gear usage in Arcgis (gear_type_map)

create a table of gear use by month

gearby_mth
     gear
month bottom_trawl bottom_trawl_alfredo_03 bottom_trawl_campelen_14 bottom_trawl_campelen_1800 bottom_trawl_campelen_21 bottom_trawl_cosmos_2600 bottom_trawl_western_IIA unknown
   1             0                       0                        0                        263                        0                        0                        0       0
   2             7                       0                        0                          4                        0                        0                        0       0
   3           342                       0                        0                          0                        0                        0                        0       1
   4            12                       0                        0                        732                        0                        0                        0       0
   5             8                       0                        0                       1106                        0                        0                        0      18
   6             3                       0                        0                       1694                        0                        0                        0      25
   7           304                       0                       18                         64                        1                        0                        0      43
   8            14                       0                       34                       1193                       36                        3                        7     543
   9             0                       0                        0                         12                       23                        0                     1380      16
   10            2                       3                        0                        545                        0                       60                        5       2
   11            3                       0                        0                       1811                        0                        1                        0       8
   12            0                       0                        0                       1136                        0                        0                        0       0
     gear
month vertical_plankton_tow
   1                      0
   2                      0
   3                      3
   4                      3
   5                      0
   6                      0
   7                     17
   8                      1
   9                      3
   10                     3
   11                     3
   12                     1

What I want to see if if there is a marked difference between the env. correlates of the presence points between gear type used. This is also to try deal with detection bias between gear type (b/c the whole region is not uniformly sampled by the same gear type)

first create gear datasets

presence$gear <- as.character(presence$gear)
bottom_trawl <- subset(presence, gear == "bottom_trawl")
bottom_trawl_alfredo_03 <- subset(presence, gear == "bottom_trawl_alfredo_03")
bottom_trawl_campelen_14 <- subset(presence, gear == "bottom_trawl_campelen_14")
bottom_trawl_campelen_1800 <- subset(presence, gear == "bottom_trawl_campelen_1800")
bottom_trawl_campelen_21 <- subset(presence, gear == "bottom_trawl_campelen_21")
bottom_trawl_cosmos_2600 <- subset(presence, gear == "bottom_trawl_cosmos_2600")
bottom_trawl_western_IIA <- subset(presence, gear == "bottom_trawl_western_IIA")
unknown <- subset(presence, gear == "unknown")
vertical_plankton_tow <- subset(presence, gear == "vertical_plankton_tow")

plot by each variable, less 3m (2 samples) 1c (one sample) and 5ze (zero samples?!)

ggplot(bottom_trawl, aes(x = o2_depth, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/o2_depth_gear.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

par(mar=c(7,5,1,1))
boxplot(presence$temp_depth ~ presence$gear, ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth by gear type", las = 2)
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_dept_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

looks like not much difference… what about a kruskal wallace test?

kruskal.test(presence$temp_depth ~ presence$gear)

Ok so there is a statistically sig difference somewhere in the temp at depth reported by the gear type (Kruskal-Wallis chi-squared = 248.27, df = 7, p-value < 2.2e-16)

To see where the difference(s) are run a Dunn test Zar (2010) states that the Dunn test (in the FSA package) is appropriate for groups with unequal numbers of observations.(Zar, J.H. 2010. Biostatistical Analysis, 5th ed. Pearson Prentice Hall: Upper Saddle River, NJ.) http://rcompanion.org/rcompanion/d_06.html

SAM WHEN DUNN IS INSTALLED, GO TO RCOMPANIONS.ORG AND LOOK FOR DUNN - CHECK CHROME HISTORY (PROBABLY UNDER KRUSKAL WALLIS) ALSO MAP THE DISTRIBUTION OF THESE GEAR TYPES

pairwise.wilcox.test(presence$temp_depth, presence$gear, p.adj='bonferroni', exact=F)

dunn test

gear_temp_depthdunn <- dunnTest(presence$temp_depth, presence$gear, list = TRUE)
gear_temp_depthdunn



write.csv(table, "../output/env/gear_temp_depthdunn.csv", row.names = TRUE)

vif

for this you need the joined dataset. this was created under mergig_datasets.src and the file is ../output/bio/presab.csv

presab <- read.csv("../output/bio/presab.csv", header = TRUE)
vif_allpresab <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allpresab, "../output/bio/vif_allpresab.csv", row.names = TRUE)
vif_allpresab

interpret - see https://stats.stackexchange.com/questions/70679/which-variance-inflation-factor-should-i-be-using-textgvif-or-textgvif/96584#96584 To make GVIFs comparable across dimensions, we suggested using GVIF^(1/(2Df)), where Df is the number of coefficients in the subset (ref fox and motette 1992 in zotero) or the 2 continuous variables, GVIFˆ(1/(2Df)) (which is basically the square root of the VIF/GVIF value as DF=1) is the proportional change of the standard error and confidence interval of their coefficients due to the level of collinearity. The GVIFˆ(1/(2Df)) value of the categorical variable is a similar measure for the reduction in precision of the coefficients’ estimation due to collinearity. apparently i just need to square GVIF^(1/(2Df)) and then use the normal VIF “rule of thumb”…

vif_allpresab_sq <- read.csv("../output/bio/vif_allpresab.csv", header = TRUE)
vif_allpresab_sq$GVIF2Dfsq <- vif_allpresab_sq$GVIF..1..2.Df..^2
write.csv(vif_allpresab_sq, "../output/bio/vif_allpresab_sq.csv", row.names = TRUE)
vif_allpresab_sq

As per SLR suggestion, rerun without gear

vif_allbutgear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutgear, "../output/bio/vif_allbutgear.csv", row.names = TRUE)
vif_allbutgear

As per SLR suggestion, rerun without gear and most highly correlated variables

vif_allbutgearhighlycorr <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbutgearhighlycorr, "../output/bio/vif_allbutgearhighlycorr.csv", row.names = TRUE)
vif_allbutgearhighlycorr

As per SLR suggestion, rerun with gear but without most highly correlated variables

vif_allbuthighlycorr <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter + gear, data = presab))
write.csv(vif_allbuthighlycorr, "../output/bio/vif_allbuthighlycorr.csv", row.names = TRUE)
vif_allbuthighlycorr

vif_allbuthighlycorr_sq <- read.csv("../output/bio/vif_allbuthighlycorr.csv", header = TRUE)
vif_allbuthighlycorr_sq$GVIF2Dfsq <- vif_allbuthighlycorr_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuthighlycorr_sq, "../output/bio/vif_allbuthighlycorr_sq.csv", row.names = TRUE)
vif_allbuthighlycorr_sq

ok remove one variable at a time - leave gear in

remove amo_prev

vif_allbutamo_prev <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbutamo_prev, "../output/bio/vif_allbutamo_prev.csv", row.names = TRUE)
vif_allbutamo_prev

vif_allbutamo_prev_sq <- read.csv("../output/bio/vif_allbutamo_prev.csv", header = TRUE)
vif_allbutamo_prev_sq$GVIF2Dfsq <- vif_allbutamo_prev_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutamo_prev_sq, "../output/bio/vif_allbutamo_prev_sq.csv", row.names = TRUE)
vif_allbutamo_prev_sq

and leave gear out

remove amo_prev + gear

vif_allbutamo_prevgear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbutamo_prevgear, "../output/bio/vif_allbutamo_prevgear.csv", row.names = TRUE)
vif_allbutamo_prevgear

remove chl_depth

vif_allbutchl_depth <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutchl_depth, "../output/bio/vif_allbutchl_depth.csv", row.names = TRUE)
vif_allbutchl_depth

vif_allbutchl_depth_sq <- read.csv("../output/bio/vif_allbutchl_depth.csv", header = TRUE)
vif_allbutchl_depth_sq$GVIF2Dfsq <- vif_allbutchl_depth_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutchl_depth_sq, "../output/bio/vif_allbutchl_depth_sq.csv", row.names = TRUE)
vif_allbutchl_depth_sq

remove chl_depth and gear

vif_allbutchl_depthgear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutchl_depthgear, "../output/bio/vif_allbutchl_depthgear.csv", row.names = TRUE)
vif_allbutchl_depthgear

remove ssh_surface

vif_allbutssh_surface <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutssh_surface, "../output/bio/vif_allbutssh_surface.csv", row.names = TRUE)
vif_allbutssh_surface

vif_allbutssh_surface_sq <- read.csv("../output/bio/vif_allbutssh_surface.csv", header = TRUE)
vif_allbutssh_surface_sq$GVIF2Dfsq <- vif_allbutssh_surface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutssh_surface_sq, "../output/bio/vif_allbutssh_surface_sq.csv", row.names = TRUE)
vif_allbutssh_surface_sq

remove ssh_surface & gear

vif_allbutssh_surfacegear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutssh_surfacegear, "../output/bio/vif_allbutssh_surfacegear.csv", row.names = TRUE)
vif_allbutssh_surfacegear

remove o2_surface

vif_allbuto2_surface <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbuto2_surface, "../output/bio/vif_allbuto2_surface.csv", row.names = TRUE)
vif_allbuto2_surface

vif_allbuto2_surface_sq <- read.csv("../output/bio/vif_allbuto2_surface.csv", header = TRUE)
vif_allbuto2_surface_sq$GVIF2Dfsq <- vif_allbuto2_surface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuto2_surface_sq, "../output/bio/vif_allbuto2_surface_sq.csv", row.names = TRUE)
vif_allbuto2_surface_sq

remove o2_surface & gear

vif_allbuto2_surfacegear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbuto2_surfacegear, "../output/bio/vif_allbuto2_surfacegear.csv", row.names = TRUE)
vif_allbuto2_surfacegear

as per SLR chat jan 23:

remove salinity_surface only

vif_allbutsalinitysurface <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutsalinitysurface, "../output/bio/vif_allbutsalinitysurface.csv", row.names = TRUE)
vif_allbutsalinitysurface

vif_allbutsalinitysurface_sq <- read.csv("../output/bio/vif_allbutsalinitysurface.csv", header = TRUE)
vif_allbutsalinitysurface_sq$GVIF2Dfsq <- vif_allbutsalinitysurface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutsalinitysurface_sq, "../output/bio/vif_allbutsalinitysurface_sq.csv", row.names = TRUE)
vif_allbutsalinitysurface_sq

and without gear

vif_allbutsalinitysurfacegear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutsalinitysurfacegear, "../output/bio/vif_allbutsalinitysurfacegear.csv", row.names = TRUE)
vif_allbutsalinitysurfacegear

now remove temp_surface plus highly correlated

#with gear
vif_allbuthighcorrtempsurface <- vif(lm(occurrence ~ temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbuthighcorrtempsurface, "../output/bio/vif_allbuthighcorrtempsurface.csv", row.names = TRUE)
vif_allbuthighcorrtempsurface

vif_allbuthighcorrtempsurface_sq <- read.csv("../output/bio/vif_allbuthighcorrtempsurface.csv", header = TRUE)
vif_allbuthighcorrtempsurface_sq$GVIF2Dfsq <- vif_allbuthighcorrtempsurface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuthighcorrtempsurface_sq, "../output/bio/vif_allbuthighcorrtempsurface_sq.csv", row.names = TRUE)
vif_allbuthighcorrtempsurface_sq

#without gear
vif_allbuthighcorrtempsurfacegear <- vif(lm(occurrence ~ temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbuthighcorrtempsurfacegear, "../output/bio/vif_allbuthighcorrtempsurfacegear.csv", row.names = TRUE)
vif_allbuthighcorrtempsurfacegear

now remove temp_surface plus salinity_surface

#with gear
vif_allbuttempsalsurface <- vif(lm(occurrence ~ temp_depth + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbuttempsalsurface, "../output/bio/vif_allbuttempsalsurface.csv", row.names = TRUE)
vif_allbuttempsalsurface

vif_allbuttempsalsurface_sq <- read.csv("../output/bio/vif_allbuttempsalsurface.csv", header = TRUE)
vif_allbuttempsalsurface_sq$GVIF2Dfsq <- vif_allbuttempsalsurface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuttempsalsurface_sq, "../output/bio/vif_allbuttempsalsurface_sq.csv", row.names = TRUE)
vif_allbuttempsalsurface_sq

#without gear
vif_allbuttempsalsurfacegear <- vif(lm(occurrence ~ temp_depth + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbuttempsalsurfacegear, "../output/bio/vif_allbuttempsalsurfacegear.csv", row.names = TRUE)
vif_allbuttempsalsurfacegear

now remove temp_surface plus salinity_surface + highly correlated

#with gear
vif_allbuthighcorrtempsalsurface <- vif(lm(occurrence ~ temp_depth + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbuthighcorrtempsalsurface, "../output/bio/vif_allbuthighcorrtempsalsurface.csv", row.names = TRUE)
vif_allbuthighcorrtempsalsurface

vif_allbuthighcorrtempsalsurface_sq <- read.csv("../output/bio/vif_allbuthighcorrtempsalsurface.csv", header = TRUE)
vif_allbuthighcorrtempsalsurface_sq$GVIF2Dfsq <- vif_allbuthighcorrtempsalsurface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuthighcorrtempsalsurface_sq, "../output/bio/vif_allbuthighcorrtempsalsurface_sq.csv", row.names = TRUE)
vif_allbuthighcorrtempsalsurface_sq

#without gear
vif_allbuthighcorrtempsalsurfacegear <- vif(lm(occurrence ~ temp_depth + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbuthighcorrtempsalsurfacegear, "../output/bio/vif_allbuthighcorrtempsalsurfacegear.csv", row.names = TRUE)
vif_allbuthighcorrtempsalsurfacegear

Just out of curiosity, a vif will all but but atmos drivers

vif_allbutnaoamo <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear, data = presab))
write.csv(vif_allbutnaoamo, "../output/bio/vif_allbutnaoamo.csv", row.names = TRUE)
vif_allbutnaoamo

vif_allbutnaoamo_sq <- read.csv("../output/bio/vif_allbutnaoamo.csv", header = TRUE)
vif_allbutnaoamo_sq$GVIF2Dfsq <- vif_allbutnaoamo_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutnaoamo_sq, "../output/bio/vif_allbutnaoamo_sq.csv", row.names = TRUE)
vif_allbutnaoamo_sq

check to see how many cells have >1 point per timeslice

this needs to be 3D so you need to create a cell_id_3d

presab$cell_id_3d <- paste(presab$cell_id, presab$depthlayerno, sep="_") #create a new column that is a unique cell_id & depth ID. 
head(presab)

and now do one with a timeslice label

write.csv(presab, "../output/bio/../output/bio/presab_cellid_xyzt.csv", row.names = FALSE)
cannot open file '../output/bio/../output/bio/presab_cellid_xyzt.csv': No such file or directoryError in file(file, ifelse(append, "a", "w")) : 
  cannot open the connection

because background points can land in the same xyzt, subset the dataset to be only presences

presencedup <- subset(presab, occurrence == "1")
nrow(presencedup)
[1] 11393

the data is in presencedup$total_cell_obs_xyzt

unique_pointpercell <- unique(presencedup$total_cell_obs_xyzt)
unique_pointpercell
[1]  1  3  2  4  5  6 15

bugger… ok so lets do a frequency table

so there are 949 3d timesliced cells that contain more than one occurrence, and 2042 rows. - different sampling days - different coordinates - risk of duplicate entries (but only if two instituions inputted the same data with slightly different coordinates and different reserch program names - see cell3985_18_2000_7 below as example)

cell3985_18_2000_7  <- subset(celobsmore1, cell_id_xyzt == "3985_18_2000_7")
cell3985_18_2000_7 

Ideally you only want one observation cell cell_id_xyzt. First see if any are missing depth data

find the column numbers that correspond to the env. data (use fastmatch package as is quicker)

fmatch("chl_surface", names(celobsmore1)) #28
[1] 28
fmatch("chl_depth", names(celobsmore1)) #29
[1] 29
fmatch("mlp_surface", names(celobsmore1)) #30
[1] 30
fmatch("o2_surface", names(celobsmore1)) #31
[1] 31
fmatch("o2_depth", names(celobsmore1)) #32
[1] 32
fmatch("salinity_surface", names(celobsmore1)) #33
[1] 33
fmatch("salinity_depth", names(celobsmore1)) #34
[1] 34
fmatch("ssh_surface", names(celobsmore1)) #35
[1] 35
fmatch("temp_surface", names(celobsmore1)) #36
[1] 36
fmatch("temp_depth", names(celobsmore1)) #37
[1] 37

then subset any with NA values

celobsmore1_novals <-  celobsmore1[!complete.cases(celobsmore1[28:37]), ] #28:37 represent the columns w/ env. data
nrow(celobsmore1_novals) #just to check how many there are - 1095 out of 2042
[1] 1095

ok and subset with all values

celobsmore1_allvals <- celobsmore1[complete.cases(celobsmore1[28:37]), ] #28:37 represent the columns w/ env. data
nrow(celobsmore1_allvals) #947 out of 2042
[1] 947

ok so how many unique cells do we have with complete data…

pointpercellcount_alldata <- count(celobsmore1_allvals, "total_cell_obs_xyzt")
pointpercellcount_alldata$no_cells <- pointpercellcount_alldata$freq / pointpercellcount_alldata$total_cell_obs_xyzt
write.csv(pointpercellcount_alldata, file = "../output/bio/pointpercellcount_alldata.csv")
pointpercellcount_alldata

and see which cell_id_xyzt appear in both celobsmore1_allvals and celobsmore1_novals

celobsmore1_allnovals <- inner_join(celobsmore1_allvals, celobsmore1_novals)
Joining, by = c("cell_id", "year", "month", "depthlayerno", "id", "decimalLatitude", "decimalLongitude", "datecollected", "institutioncode", "individualcount", "depth", "resname", "originalscientificname", "collectioncode", "day", "occurrence", "nafo_zone", "gear", "longitude_meters", "latitude_meters", "amo_sample", "amo_prev", "amo_winter", "depth_layer", "bottom_depth", "total_cell_obs_xy", "total_cell_obs_xyt", "chl_surface", "chl_depth", "mlp_surface", "o2_surface", "o2_depth", "salinity_surface", "salinity_depth", "ssh_surface", "temp_surface", "temp_depth", "nao_sample", "nao_prev", "nao_winter", "total_cell_obs_xyzt", "temp_celsius_depth", "temp_celsius_surface", "bottom_depth_glorys", "cell_id_3d", "cell_id_xyzt")
celobsmore1_allnovals 

None…. which is good. I’d expect observations in the same xyzt to have the same values

so, I can successfully reduce the observations down to one per xyzt

obs_cell_yymm_depth_dup <- count(presab, cell_id, year, month, depthlayerno))
Error: unexpected ')' in "obs_cell_yymm_depth_dup <- count(presab, cell_id, year, month, depthlayerno))"

monthly spearmans correlations and VIF

curious - does correlations/vif alter between months?

First split the dataset into monthly

janpresab <- subset(presab, month == "1")
febpresab <- subset(presab, month == "2")
marpresab <- subset(presab, month == "3")
aprpresab <- subset(presab, month == "4")
maypresab <- subset(presab, month == "5")
junpresab <- subset(presab, month == "6")
julpresab <- subset(presab, month == "7")
augpresab <- subset(presab, month == "8")
seppresab <- subset(presab, month == "9")
octpresab <- subset(presab, month == "10")
novpresab <- subset(presab, month == "11")
decpresab <- subset(presab, month == "12")

now get the background points for each month (for spearmans)

janback <- subset(janpresab, occurrence == "0")
febback <- subset(febpresab, occurrence == "0")
marback <- subset(marpresab, occurrence == "0")
aprback <- subset(aprpresab, occurrence == "0")
mayback <- subset(maypresab, occurrence == "0")
junback <- subset(junpresab, occurrence == "0")
julback <- subset(julpresab, occurrence == "0")
augback <- subset(augpresab, occurrence == "0")
sepback <- subset(seppresab, occurrence == "0")
octback <- subset(octpresab, occurrence == "0")
novback <- subset(novpresab, occurrence == "0")
decback <- subset(decpresab, occurrence == "0")

run the correlations on each month’s background points

janback <- subset(janback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
janback_cor <- cor(janback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(janback_cor, "../output/env/janback_cor.csv", row.names = TRUE)

febback <- subset(febback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
febback_cor <- cor(febback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(febback_cor, "../output/env/febback_cor.csv", row.names = TRUE)

marback <- subset(marback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
marback_cor <- cor(marback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(marback_cor, "../output/env/marback_cor.csv", row.names = TRUE)

aprback <- subset(aprback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
aprback_cor <- cor(aprback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(aprback_cor, "../output/env/aprback_cor.csv", row.names = TRUE)

mayback <- subset(mayback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
mayback_cor <- cor(mayback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(mayback_cor, "../output/env/mayback_cor.csv", row.names = TRUE)

junback <- subset(junback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
junback_cor <- cor(junback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(junback_cor, "../output/env/junback_cor.csv", row.names = TRUE)

julback <- subset(julback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
julback_cor <- cor(julback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(julback_cor, "../output/env/julback_cor.csv", row.names = TRUE)

augback <- subset(augback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
augback_cor <- cor(augback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(augback_cor, "../output/env/augback_cor.csv", row.names = TRUE)

sepback <- subset(sepback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
sepback_cor <- cor(sepback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(sepback_cor, "../output/env/sepback_cor.csv", row.names = TRUE)

octback <- subset(octback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
octback_cor <- cor(octback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(octback_cor, "../output/env/octback_cor.csv", row.names = TRUE)

novback <- subset(novback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
novback_cor <- cor(novback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(novback_cor, "../output/env/novback_cor.csv", row.names = TRUE)

decback <- subset(decback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
decback_cor <- cor(decback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(decback_cor, "../output/env/decback_cor.csv", row.names = TRUE)

maxent?

library("raster")
library("dismo")
library("rgeos")
---
title: "Data Exploration"
author: "Samantha Andrews"
output: 
  html_notebook: 
    fig_height: 7
    fig_width: 10
editor_options: 
  chunk_output_type: inline
---

# Overview
Exploration of all the data collected on the presence points + randomly generated background points

A note to anyone who might happen to stumble across this... I am a beginner in R and have had no exposure to similar languages. I don't know what I'm doing. The code herein is unlikely to be elegant and there are probably more efficient ways of running the code.

Built with 'r getRversion()'.

# Package dependencies
You can load them using the following code which uses a function called [ipak](https://gist.github.com/stevenworthington/3178163). 
Note this function checks to see if the packages are installed first.
The "include=FALSE" supresses the package installation text appearing in the document...
```{r pre-install packages, include=FALSE}
packages <- c("plyr", "graphics", "ggplot2", "corrplot", "FSA", "car", "rworldmap", "fastmatch") 
source("../src/ipak.R")
ipak(packages)
```


# presence points
Read in the presence points

```{r}
presence <- read.csv("../output/bio/presence_points_without_envdata_relooped_glbathy.csv", header = TRUE)
head(presence)
```

## Update: Who sampled in which year and which month:

year
```{r}
inst_by_yr <- with(presence, table(year, institutioncode))
write.csv(inst_by_yr, file = "../output/bio/institutioncode_by_yr.csv")
inst_by_yr
```

month
```{r}
inst_by_mt <- with(presence, table(month, institutioncode))
write.csv(inst_by_mt, file = "../output/bio/institutioncode_by_mth.csv")
inst_by_mt
```

## environmental correlates

1) Get the max, min, an mean values and add into a dataframe

Temperature
```{r}
temp_depth_max <- max(presence$temp_depth, na.rm = TRUE)
temp_depth_min <- min(presence$temp_depth, na.rm = TRUE)
temp_depth_mean <- mean(presence$temp_depth, na.rm = TRUE)
temp_surface_max <- max(presence$temp_surface, na.rm = TRUE)
temp_surface_min <- min(presence$temp_surface, na.rm = TRUE)
temp_surface_mean <- mean(presence$temp_surface, na.rm = TRUE)
```

Salinity
```{r}
sal_depth_max <- max(presence$salinity_depth, na.rm = TRUE)
sal_depth_min <- min(presence$salinity_depth, na.rm = TRUE)
sal_depth_mean <- mean(presence$salinity_depth, na.rm = TRUE)
sal_surface_max <- max(presence$salinity_surface, na.rm = TRUE)
sal_surface_min <- min(presence$salinity_surface, na.rm = TRUE)
sal_surface_mean <- mean(presence$salinity_surface, na.rm = TRUE)
```

Chl
```{r}
chl_depth_max <- max(presence$chl_depth, na.rm = TRUE)
chl_depth_min <- min(presence$chl_depth, na.rm = TRUE)
chl_depth_mean <- mean(presence$chl_depth, na.rm = TRUE)
chl_surface_max <- max(presence$chl_surface, na.rm = TRUE)
chl_surface_min <- min(presence$chl_surface, na.rm = TRUE)
chl_surface_mean <- mean(presence$chl_surface, na.rm = TRUE)
```

O2
```{r}
o2_depth_max <- max(presence$o2_depth, na.rm = TRUE)
o2_depth_min <- min(presence$o2_depth, na.rm = TRUE)
o2_depth_mean <- mean(presence$o2_depth, na.rm = TRUE)
o2_surface_max <- max(presence$o2_surface, na.rm = TRUE)
o2_surface_min <- min(presence$o2_surface, na.rm = TRUE)
o2_surface_mean <- mean(presence$o2_surface, na.rm = TRUE)
```

MLP
```{r}
mlp_surface_max <- max(presence$mlp_surface, na.rm = TRUE)
mlp_surface_min <- min(presence$mlp_surface, na.rm = TRUE)
mlp_surface_mean <- mean(presence$mlp_surface, na.rm = TRUE)
```

SSH
```{r}
ssh_surface_max <- max(presence$ssh_surface, na.rm = TRUE)
ssh_surface_min <- min(presence$ssh_surface, na.rm = TRUE)
ssh_surface_mean <- mean(presence$ssh_surface, na.rm = TRUE)
```

create matrix
```{r}
td <- c(temp_depth_max, temp_depth_min, temp_depth_mean)
ts <- c(temp_surface_max, temp_surface_min, temp_surface_mean)
sd <- c(sal_depth_max, sal_depth_min, sal_depth_mean)
ss <- c(sal_surface_max, sal_surface_min, sal_surface_mean)
cd <- c(sal_depth_max, sal_depth_min, sal_depth_mean)
cs <- c(sal_surface_max, sal_surface_min, sal_surface_mean)
od <- c(o2_depth_max, o2_depth_min, o2_depth_mean)
os <- c(o2_surface_max, o2_surface_min, o2_surface_mean)
mlp <- c(mlp_surface_max, mlp_surface_min, mlp_surface_mean)
ssh <- c(ssh_surface_max, ssh_surface_min, ssh_surface_mean)
env_stats <- rbind(td, ts, sd, ss, cd, cs, od, os, mlp, ssh)
row.names(env_stats) <- c("Temp Depth", "Temp Surface", "Salinity Depth", "Salinity Surface", "Chl Depth", "Chl Surface", "Oxygen Depth", "Oxygen Surface", "MLP", "SSH") 
colnames(env_stats) <- c("Max", "Min", "Mean")
write.csv(env_stats, "../output/env/env_correlates_basic_stats.csv", row.names = TRUE)
env_stats
```

Hmm some potentially suspicious values here in
- Oxygen Depth min
- mlp  Max
and maybe also
- Salinity Depth min
- Salinity Surface min
- CHL depth min
- CHL depth max

lets just see where these values appear and check the netcdf layers. Note I will check these values in cygwin using the cdo operator infov on the netcdfs (to ensure there was no issue with processing in R). 


get the year and month these values appeared in
```{r}
o2d_check_yr <- subset(presence$year, presence$o2_depth == o2_depth_min)
o2d_check_mth <- subset(presence$month, presence$o2_depth == o2_depth_min)
```
O2 depth min is in 2005_08

And value seems correct

```{r}
mlp_check_yr <- subset(presence$year, presence$mlp_surface == mlp_surface_max)
mlp_check_mth <- subset(presence$month, presence$mlp_surface == mlp_surface_max)
```
mlp max in in 2007_12

And yes the value seems correct

```{r}
sald_check_yr <- subset(presence$year, presence$salinity_depth == sal_depth_max)
sald_check_mth <- subset(presence$month, presence$salinity_depth == sal_depth_max)
```
year month 2001_05

also seems correct

```{r}
sals_check_yr <- subset(presence$year, presence$salinity_surface == sal_surface_min)
sals_check_mth <- subset(presence$month, presence$salinity_surface == sal_surface_min)
```
year month 21998_06

ok again...

Leave the rest


## simple plots of env. correlates

temperature
```{r}
par(mfrow=c(1,2))
plot(presence$temp_depth, main = "Temp at Depth (Various)", ylab = "Kelvin")
plot(presence$temp_surface, main = "Temp at Surface", ylab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```
Hmm not necessarily very helpful. But anyway, lets carry on

salinity
```{r}
par(mfrow=c(1,2))
plot(presence$salinity_depth, main = "Salinity at Depth (Various)", ylab = "Practical Salinity Units")
plot(presence$salinity_surface, main = "Salinity at Surface", ylab = "Practical Salinity Units")
dev.copy(png,"../output/env/simple_plots/salinity_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

Chlorophyll
```{r}
par(mfrow=c(1,2))
plot(presence$chl_depth, main = "Chl at Depth (Various)", ylab = "Chl (mmol.m-3)")
plot(presence$chl_surface, main = "Chl at Surface", ylab = "Chl (mmol.m-3)")
dev.copy(png,"../output/env/simple_plots/chl_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

O2
```{r}
par(mfrow=c(1,2))
plot(presence$o2_depth, main = "Dissolved Oxygen at Depth (Various)", ylab = "Chl (mmol.m-3)")
plot(presence$o2_surface, main = "Dissolved Oxygen at Surface", ylab = "O2 (mmol.m-3)")
dev.copy(png,"../output/env/simple_plots/o2_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

mlp
```{r}
plot(presence$mlp_surface, main = "Mixed Layer Thickness", ylab = "Depth (m)")
dev.copy(png,"../output/env/simple_plots/mlp_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

SSH
```{r}
plot(presence$ssh_surface, main = "Sea Surface Height", ylab = "Height (m)")
dev.copy(png,"../output/env/simple_plots/ssh_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

XXX SAM - YOU WANT TO THINK ABOUT DOING THIS BY DEPTH RATHER THAN ALL DEPTHS)

```{r}
unique_depths <- unique(presence$depthlayerno)
unique_depthdordered <- sort(unique_depths) #just puts the list in oder with no NA
length(unique_depthdordered)
```
ouch - 39 layers... a job for a boxplot probably

```{r}

```



# frequency plots of env. corr


temperature
```{r}
hist(presence$temp_surface, main = "Temp at Surface", xlab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
hist(presence$temp_dept, main = "Temp at Depth", xlab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_depth_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

chl
```{r}
hist(presence$chl_surface, main = "Chlorophyll at Surface", xlab = "Chlorophyll Concentrations (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/chl_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
hist(presence$chl_depth, main = "Chlorophyll at Observation Depth", xlab = "Chlorophyll Concentrations (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/chl_depth_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

salinity
```{r}
hist(presence$salinity_surface, main = "Salinity at Surface", xlab = "Salinity Practical Salinity Unit (PSU)")
dev.copy(png, "../output/env/simple_plots/sal_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
hist(presence$salinity_depth, main = "Salinity at Observation Depth", xlab = "Salinity Practical Salinity Unit (PSU)")
dev.copy(png, "../output/env/simple_plots/sal_depth_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

oxygen
```{r}
hist(presence$o2_surface, main = "Dissolved Oxygen at Surface", xlab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/o2_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
hist(presence$o2_depth, main = "Dissolved Oxygen at Observation Depth", xlab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/o2_depth_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

MLP
```{r}
hist(presence$mlp, main = "Mixed Layer Thickness at Observation", xlab = "Mixed Layer Thickness (m)")
dev.copy(png, "../output/env/simple_plots/mlp_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

ssh
```{r}
hist(presence$ssh, main = "Sea Surface Height at Observation", xlab = "Sea Surface Height (m)")
dev.copy(png, "../output/env/simple_plots/ssh_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```


## simple boxplots of env. corr

Same as above but with boxplots (may provide some more useful info)



individual plots of each variable 
```{r}
par(mfrow=c(1,2))
boxplot(presence$temp_depth, main = "Temperature at Depth (Various)", ylab = "Kelvin")
boxplot(presence$temp_surface, main = "Temperature at Surface", ylab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temp_boxplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

par(mfrow=c(1,2))
boxplot(presence$salinity_depth, main = "Salinity at Depth (Various)", ylab = "PSU")
boxplot(presence$salinity_surface, main = "Salinity at Surface", ylab = "PSU")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

par(mfrow=c(1,2))
boxplot(presence$o2_depth, main = "O2 at Depth (Various)", ylab = "mmol.m-3")
boxplot(presence$o2_surface, main = "O2 at Surface", ylab = "mmol.m-3")
dev.copy(png, "../output/env/simple_plots/o2_boxplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

par(mfrow=c(1,2))
boxplot(presence$chl_depth, main = "Chlorphyll at Depth (Various)", ylab = "mmol.m-3")
boxplot(presence$chl_surface, main = "Chlorphyll at Surface", ylab = "mmol.m-3")
dev.copy(png, "../output/env/simple_plots/chl_boxplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

par(mfrow=c(1,2))
boxplot(presence$mlp_surface, main = "Density Mixed Layer Thickness", ylab = "meters")
boxplot(presence$ssh_surface, main = "Sea Surface Height", ylab = "meters")
dev.copy(png, "../output/env/simple_plots/mlp_ssh_boxplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$temp_depth ~ presence$month, xlab = "month", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per Month")
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$chl_depth ~ presence$month, xlab = "month", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Observation Depth per Month")
dev.copy(png, "../output/env/simple_plots/chlorophyll_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$salinity_depth ~ presence$month, xlab = "month", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Observation Depth per Month")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$o2_depth ~ presence$month, xlab = "month", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Observation Depth per Month")
dev.copy(png, "../output/env/simple_plots/o2_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$mlp_surface ~ presence$month, xlab = "month", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Observation per Month")
dev.copy(png, "../output/env/simple_plots/mlp_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$ssh_surface ~ presence$month, xlab = "month", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Observation per Month")
dev.copy(png, "../output/env/simple_plots/ssh_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$temp_depth ~ presence$year, xlab = "Year", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per Year")
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$chl_depth ~ presence$year, xlab = "Year", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Observation Depth per Year")
dev.copy(png, "../output/env/simple_plots/chl_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$salinity_depth ~ presence$year, xlab = "Year", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Observation Depth per Year")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$o2_depth ~ presence$year, xlab = "Year", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Observation Depth per Year")
dev.copy(png, "../output/env/simple_plots/oxygen_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$mlp ~ presence$year, xlab = "Year", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Observation per Year")
dev.copy(png, "../output/env/simple_plots/mlp_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$ssh ~ presence$year, xlab = "Year", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Observation per Year")
dev.copy(png, "../output/env/simple_plots/ssh_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$temp_surface ~ presence$month, xlab = "month", ylab = "Temperature (Kelvin)", main = "Temperature at Observation (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$chl_surface ~ presence$month, xlab = "month", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Observation (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/chlorophyll_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$salinity_surface ~ presence$month, xlab = "month", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Observation (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$o2_surface ~ presence$month, xlab = "month", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Observation (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/o2_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$mlp_surface ~ presence$month, xlab = "month", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Observation per Month")
dev.copy(png, "../output/env/simple_plots/mlp_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$ssh_surface ~ presence$month, xlab = "month", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Observation per Month")
dev.copy(png, "../output/env/simple_plots/ssh_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$temp_surface ~ presence$year, xlab = "Year", ylab = "Temperature (Kelvin)", main = "Temperature at Observation (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$chl_surface ~ presence$year, xlab = "Year", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Observation (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/chl_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$salinity_surface ~ presence$year, xlab = "Year", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Observation (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$o2_surface ~ presence$year, xlab = "Year", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Observation (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/oxygen_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```


# background points
Read in the presence points

```{r}
background <- read.csv("../output/bio/background_complete_obs_cels_globot.csv", header = TRUE)
head(background)
```

# environmental correlates

1) Get the max, min, an mean values and add into a dataframe

Temperature
```{r}
temp_depth_max_back <- max(background$temp_depth, na.rm = TRUE)
temp_depth_min_back <- min(background$temp_depth, na.rm = TRUE)
temp_depth_mean_back <- mean(background$temp_depth, na.rm = TRUE)
temp_surface_max_back <- max(background$temp_surface, na.rm = TRUE)
temp_surface_min_back <- min(background$temp_surface, na.rm = TRUE)
temp_surface_mean_back <- mean(background$temp_surface, na.rm = TRUE)
```

Salinity
```{r}
sal_depth_max_back <- max(background$salinity_depth, na.rm = TRUE)
sal_depth_min_back <- min(background$salinity_depth, na.rm = TRUE)
sal_depth_mean_back <- mean(background$salinity_depth, na.rm = TRUE)
sal_surface_max_back <- max(background$salinity_surface, na.rm = TRUE)
sal_surface_min_back <- min(background$salinity_surface, na.rm = TRUE)
sal_surface_mean_back <- mean(background$salinity_surface, na.rm = TRUE)
```

Chl
```{r}
chl_depth_max_back <- max(background$chl_depth, na.rm = TRUE)
chl_depth_min_back <- min(background$chl_depth, na.rm = TRUE)
chl_depth_mean_back <- mean(background$chl_depth, na.rm = TRUE)
chl_surface_max_back <- max(background$chl_surface, na.rm = TRUE)
chl_surface_min_back <- min(background$chl_surface, na.rm = TRUE)
chl_surface_mean_back <- mean(background$chl_surface, na.rm = TRUE)
```

O2
```{r}
o2_depth_max_back <- max(background$o2_depth, na.rm = TRUE)
o2_depth_min_back <- min(background$o2_depth, na.rm = TRUE)
o2_depth_mean_back <- mean(background$o2_depth, na.rm = TRUE)
o2_surface_max_back <- max(background$o2_surface, na.rm = TRUE)
o2_surface_min_back <- min(background$o2_surface, na.rm = TRUE)
o2_surface_mean_back <- mean(background$o2_surface, na.rm = TRUE)
```

MLP
```{r}
mlp_surface_max_back <- max(background$mlp_surface, na.rm = TRUE)
mlp_surface_min_back <- min(background$mlp_surface, na.rm = TRUE)
mlp_surface_mean_back <- mean(background$mlp_surface, na.rm = TRUE)
```

SSH
```{r}
ssh_surface_max_back <- max(background$ssh_surface, na.rm = TRUE)
ssh_surface_min_back <- min(background$ssh_surface, na.rm = TRUE)
ssh_surface_mean_back <- mean(background$ssh_surface, na.rm = TRUE)
```

create matrix
```{r}
tdb <- c(temp_depth_max_back, temp_depth_min_back, temp_depth_mean_back)
tsb <- c(temp_surface_max_back, temp_surface_min_back, temp_surface_mean_back)
sdb <- c(sal_depth_max_back, sal_depth_min_back, sal_depth_mean_back)
ssb <- c(sal_surface_max_back, sal_surface_min_back, sal_surface_mean_back)
cdb <- c(sal_depth_max_back, sal_depth_min_back, sal_depth_mean_back)
csb <- c(sal_surface_max_back, sal_surface_min_back, sal_surface_mean_back)
odb <- c(o2_depth_max_back, o2_depth_min_back, o2_depth_mean_back)
osb <- c(o2_surface_max_back, o2_surface_min_back, o2_surface_mean_back)
mlpb <- c(mlp_surface_max_back, mlp_surface_min_back, mlp_surface_mean_back)
sshb <- c(ssh_surface_max_back, ssh_surface_min_back, ssh_surface_mean_back)
env_stats_back <- rbind(tdb, tsb, sdb, ssb, cdb, csb, odb, osb, mlpb, sshb)
row.names(env_stats_back) <- c("Temp Depth", "Temp Surface", "Salinity Depth", "Salinity Surface", "Chl Depth", "Chl Surface", "Oxygen Depth", "Oxygen Surface", "MLP", "SSH") 
colnames(env_stats_back) <- c("Max", "Min", "Mean")
write.csv(env_stats_back, "../output/env/env_correlates_background_basic_stats.csv", row.names = TRUE)
env_stats_back
```

temperature
```{r}
par(mfrow=c(1,2))
plot(background$temp_depth, main = "Background Points Temp at Depth (Various)", ylab = "Kelvin")
plot(background$temp_surface, main = "Background Points Temp at Surface", ylab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```


salinity
```{r}
par(mfrow=c(1,2))
plot(background$salinity_depth, main = "Background Points Salinity at Depth (Various)", ylab = "Practical Salinity Units")
plot(background$salinity_surface, main = "Background Points Salinity at Surface", ylab = "Practical Salinity Units")
dev.copy(png,"../output/env/simple_plots/salinity_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

Chlorophyll
```{r}
par(mfrow=c(1,2))
plot(background$chl_depth, main = "Background Points Chl at Depth (Various)", ylab = "Chl (mmol.m-3)")
plot(background$chl_surface, main = "Background Points Chl at Surface", ylab = "Chl (mmol.m-3)")
dev.copy(png,"../output/env/simple_plots/chl_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

O2
```{r}
par(mfrow=c(1,2))
plot(background$o2_depth, main = "Background Points Dissolved Oxygen at Depth (Various)", ylab = "Chl (mmol.m-3)")
plot(background$o2_surface, main = "Background Points Dissolved Oxygen at Surface", ylab = "O2 (mmol.m-3)")
dev.copy(png,"../output/env/simple_plots/o2_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

mlp
```{r}
plot(background$mlp_surface, main = "Background Points Mixed Layer Thickness", ylab = "Depth (m)")
dev.copy(png,"../output/env/simple_plots/mlp_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

SSH
```{r}
plot(background$ssh_surface, main = "Background Points Sea Surface Height", ylab = "Height (m)")
dev.copy(png,"../output/env/simple_plots/ssh_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

## frequency plots of env. corr


temperature
```{r}
hist(background$temp_surface, main = "Background Temp at Surface", xlab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
hist(background$temp_dept, main = "Background Temp at Depth", xlab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_background_depth_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

chl
```{r}
hist(background$chl_surface, main = "Background Chlorophyll at Surface", xlab = "Chlorophyll Concentrations (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/chl_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
hist(background$chl_depth, main = "Background Chlorophyll at Observation Depth", xlab = "Chlorophyll Concentrations (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/chl_background_depth_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

salinity
```{r}
hist(background$salinity_surface, main = "Background Salinity at Surface", xlab = "Salinity Practical Salinity Unit (PSU)")
dev.copy(png, "../output/env/simple_plots/sal_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
hist(background$salinity_depth, main = "Background Salinity at Observation Depth", xlab = "Salinity Practical Salinity Unit (PSU)")
dev.copy(png, "../output/env/simple_plots/sal_background_depth_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

oxygen
```{r}
hist(background$o2_surface, main = "Background Dissolved Oxygen at Surface", xlab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/o2_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
hist(background$o2_depth, main = "Background Dissolved Oxygen at Observation Depth", xlab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/o2_background_depth_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

MLP
```{r}
hist(background$mlp, main = "Background Mixed Layer Thickness at Observation", xlab = "Mixed Layer Thickness (m)")
dev.copy(png, "../output/env/simple_plots/mlp_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

ssh
```{r}
hist(background$ssh, main = "Background Sea Surface Height at Observation", xlab = "Sea Surface Height (m)")
dev.copy(png, "../output/env/simple_plots/ssh_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

## simple boxplots of env. corr

Same as above but with boxplots (may provide some more useful info)

temperature


```{r}
boxplot(background$temp_depth ~ background$month, xlab = "month", ylab = "Temperature (Kelvin)", main = "Temperature at Background Depth per Month")
dev.copy(png, "../output/env/simple_plots/temperature_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$chl_depth ~ background$month, xlab = "month", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Background Depth per Month")
dev.copy(png, "../output/env/simple_plots/chlorophyll_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$salinity_depth ~ background$month, xlab = "month", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Background Depth per Month")
dev.copy(png, "../output/env/simple_plots/salinity_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$o2_depth ~ background$month, xlab = "month", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Background Depth per Month")
dev.copy(png, "../output/env/simple_plots/o2_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$mlp_surface ~ background$month, xlab = "month", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Background per Month")
dev.copy(png, "../output/env/simple_plots/mlp_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$ssh_surface ~ background$month, xlab = "month", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Background per Month")
dev.copy(png, "../output/env/simple_plots/ssh_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$temp_depth ~ background$year, xlab = "Year", ylab = "Temperature (Kelvin)", main = "Temperature at Background Depth per Year")
dev.copy(png, "../output/env/simple_plots/temperature_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$chl_depth ~ background$year, xlab = "Year", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Background Depth per Year")
dev.copy(png, "../output/env/simple_plots/chl_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$salinity_depth ~ background$year, xlab = "Year", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Background Depth per Year")
dev.copy(png, "../output/env/simple_plots/salinity_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$o2_depth ~ background$year, xlab = "Year", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Background Depth per Year")
dev.copy(png, "../output/env/simple_plots/oxygen_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$mlp ~ background$year, xlab = "Year", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Background per Year")
dev.copy(png, "../output/env/simple_plots/mlp_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$ssh ~ background$year, xlab = "Year", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Background per Year")
dev.copy(png, "../output/env/simple_plots/ssh_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$temp_surface ~ background$month, xlab = "month", ylab = "Temperature (Kelvin)", main = "Temperature at Background (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/temperature_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$chl_surface ~ background$month, xlab = "month", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Background (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/chlorophyll_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$salinity_surface ~ background$month, xlab = "month", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Background (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/salinity_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$o2_surface ~ background$month, xlab = "month", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Background (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/o2_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$mlp_surface ~ background$month, xlab = "month", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Background per Month")
dev.copy(png, "../output/env/simple_plots/mlp_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$ssh_surface ~ background$month, xlab = "month", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Background per Month")
dev.copy(png, "../output/env/simple_plots/ssh_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$temp_surface ~ background$year, xlab = "Year", ylab = "Temperature (Kelvin)", main = "Temperature at Background (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/temperature_background_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$chl_surface ~ background$year, xlab = "Year", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Background (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/chl_background_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$salinity_surface ~ background$year, xlab = "Year", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Background (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/salinity_background_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$o2_surface ~ background$year, xlab = "Year", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Background (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/oxygen_background_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

## correlations between background points

check to see if there are any correlations in the env. variables for the background points


first subset the env.correlate columns (you don't need everything)

```{r}
background_env <- subset(background, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
background_env_cor <- cor(background_env, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(background_env_cor, "../output/env/background_env_corr.csv", row.names = TRUE)
background_corrplot <- corrplot(background_env_cor , method = "color", type = "upper", order = "alphabet", tl.cex = 0.8) 
dev.copy(png,"../output/env/simple_plots/background_envcorr.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

to get some density plots all in one graphic, you need to change chunk output to in console, then go to the plot tab, make it fill the screen, then run then make bigger then save :( (probably a better way - this seems to be an issue with RStudio)
```{r}
par(mfrow=c(4,4))
for(i in 1:16){
  plot(density(background_env[,i], na.rm=T), main = names(background_env)[i])
}
```
PUT THE CHUNK OUTPUT BACK TO INLINE


add nafo region and gear type into the mix


first subset the env.correlate columns + bottom_depth (you don't need everything)

```{r}
background_envbotdepth <- subset(background, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth, bottom_depth))
background_envbotdepth_cor <- cor(background_envbotdepth, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(background_envbotdepth_cor, "../output/env/background_envbotdepth_cor.csv", row.names = TRUE)
background_corrplot <- corrplot(background_envbotdepth_cor , method = "color", type = "upper", order = "alphabet", tl.cex = 0.8) 
dev.copy(png,"../output/env/simple_plots/background_envbotdepth_cor.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```


## correlations between presence points

check to see if there are any correlations in the env. variables

first subset the env.correlate columns (you don't need everything) then use cor to get the correlation values, and then corrplot for a visual

```{r}
pres_env <- subset(presence, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth, bottom_depth))
pres_env_cor <- cor(pres_env, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(pres_env_cor, "../output/env/pres_env_corr.csv", row.names = TRUE)
pres_corrplot <- corrplot(pres_env_cor , method = "color", type = "upper", order = "alphabet", tl.cex = 0.8) 
dev.copy(png,"../output/env/simple_plots/pres_envcorr.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

to get some density plots all in one graphic, you need to change chunk output to in console, then go to the plot tab, make it fill the screen, then run then make bigger then save :( (probably a better way - this seems to be an issue with RStudio)
```{r}
par(mfrow=c(4,4))
for(i in 1:16){
  plot(density(pres_env[,i], na.rm=T), main = names(pres_env)[i])
}
```
PUT THE CHUNK OUTPUT BACK TO INLINE


## density plot with background and presence env. data

Inspired by Merrow 2013 - top paragraph of page 1063 (are the species observations uniformly distributed over the background, or are they skewed)

```{r}
ggplot(pres_env, aes(x = temp_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/temp_depth_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = temp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/temp_surface_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = chl_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/chl_surface_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = chl_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/chl_depth_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = salinity_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/salinity_surface_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = salinity_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/salinity_depth_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = o2_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/o2_surface_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = o2_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/o2_depth_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = mlp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/mlp_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = ssh_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/ssh_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```


# NAFO Regions

compare the environmental correlates between different NAFO regions

first see which nafo zones were sampled in each year
```{r presence nafo by year}
nafo_by_yr <- with(presence, table(year, nafo_zone))
write.csv(nafo_by_yr, file = "../output/bio/nafozone_by_yr.csv")
nafo_by_yr
```

and by month
```{r prsence nafo by month}
nafo_by_mth <- with(presence, table(nafo_zone, month))
write.csv(nafo_by_mth, file = "../output/bio/nafozone_by_mth.csv")
nafo_by_mth
```

Interesting that there is a point in 1C - this is outside Canadian waters.... anyway
```{r}
onec <-  subset(presence, nafo_zone == "1C")
onec
```

## density plot with background and presence env. data by NAFO region

What I want to see if if there is a marked difference between the env. correlates of the presence points between NAFO regions. This is also to try deal with sampling bias (b/c the whole region is not uniformly sampled in each month, but rather nafo regions have a strong month bias)

first create NAFO-region datasets

```{r presence by nafo}
nafo0a <- subset(presence, nafo_zone == "0A")
nafo0b <- subset(presence, nafo_zone == "0B")
nafo1c <- subset(presence, nafo_zone == "1C")
nafo2g <- subset(presence, nafo_zone == "2G")
nafo2h <- subset(presence, nafo_zone == "2H")
nafo2j <- subset(presence, nafo_zone == "2J")
nafo3k <- subset(presence, nafo_zone == "3K")
nafo3l <- subset(presence, nafo_zone == "3L")
nafo3m <- subset(presence, nafo_zone == "3M")
nafo3n <- subset(presence, nafo_zone == "3N")
nafo3o <- subset(presence, nafo_zone == "3O")
nafo3ps <- subset(presence, nafo_zone == "3Ps")
nafo4r <- subset(presence, nafo_zone == "4R")
nafo4s <- subset(presence, nafo_zone == "4S")
nafo4t <- subset(presence, nafo_zone == "4T")
nafo4vn <- subset(presence, nafo_zone == "4Vn")
nafo4vs <- subset(presence, nafo_zone == "4Vs")
nafo4w <- subset(presence, nafo_zone == "4W")
nafo4x <- subset(presence, nafo_zone == "4X")
nafo5y <- subset(presence, nafo_zone == "5Y")
nafo5ze <- subset(presence, nafo_zone == "5ZE")
nafohudson <- subset(presence, nafo_zone == "HudsonStrait")
```

plot by each variable, less 3m (2 samples) 1c (one sample) and 5ze (zero samples?!)
```{r presence by nafo by variable}
ggplot(nafo0a, aes(x = temp_surface, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/temp_surface_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = temp_depth, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/temp_depth_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = chl_surface, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/chl_surface_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = chl_depth, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/chl_depth_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = salinity_surface, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/salinity_surface_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = salinity_depth, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/salinity_depth_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = o2_surface, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/o2_surface_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = o2_depth, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/o2_depth_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = mlp_surface, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/mlp_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = ssh_surface, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/ssh_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```



Let's plot the variables by nafo region/year then by month


```{r}
pr98 <- subset(presence, year == "1998")
boxplot(pr98$temp_depth ~ pr98$nafo_zone, xlab = "NAFO Region", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per NAFO Zone")
```

```{r}
pr98 <- subset(presence, year == "1998")
pr11 <- subset(presence, year == "2011")
par(mfrow=c(2,1))
boxplot(pr98$temp_depth ~ pr98$nafo_zone, xlab = "NAFO Region", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per NAFO Zone")
boxplot(pr11$temp_depth ~ pr11$nafo_zone, xlab = "NAFO Region", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per NAFO Zone")
```

ok scrap the nafo region analysis - it is too mixed up with month (so seeing if region or month is a factor that needs to be factored in to the model is not appropriate)

# remap who sampled

Now lets check the number of records and spatial-temporal distribution of the observations by institution code to make sure none are dodgy

first a table of how many observations each instituioncode has
```{r institution code analysis - count}
obs_by_ins <- count(presence, "institutioncode")
obs_by_ins
write.csv(obs_by_ins, file = "../output/bio/samplinginstitutions/no_observations_institutioncode.csv")
```
ok so IMR, NAFO, ICES, & USM have very few entries (1, 1, 1, and 3 respectively)

Lets take a look at the spatial breakdown of these institutions.First all points...

```{r}
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(38, 70), asp = 1, main = "All Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(presence$decimalLongitude, presence$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/all_occurrences.png") #this prints a png of the plot
dev.off() #this turns off the print command
```
Note there is one point up by iceland that you should get rid of (Icelandic population thought to be seperate from Labrador, but it is unclear if this is true or not).

Map the institutioncode == ARC only data...
```{r map obs by institutuion}
ARC_obs <- presence[presence$institutioncode == "ARC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "Arc Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(ARC_obs$decimalLongitude, ARC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/ARC_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFOCENARC_obs <- presence[presence$institutioncode == "DFOCENARC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Central & Arctic Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOCENARC_obs$decimalLongitude, DFOCENARC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOCENARC_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFOGulf_obs <- presence[presence$institutioncode == "DFOGulf", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Gulf Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOGulf_obs$decimalLongitude, DFOGulf_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOGulf_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFOISDM_obs <- presence[presence$institutioncode == "DFOISDM", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO ISDM Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOISDM_obs$decimalLongitude, DFOISDM_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOISDM_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFOMTMS_obs <- presence[presence$institutioncode == "DFOMTMS", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Maritimes Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOMTMS_obs$decimalLongitude, DFOMTMS_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOMTMS_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFONL_obs <- presence[presence$institutioncode == "DFONL", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Newfouundland & Labrador Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFONL_obs$decimalLongitude, DFONL_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFONL_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFOQC_obs <- presence[presence$institutioncode == "DFOQC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Quebec Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOQC_obs$decimalLongitude, DFOQC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOQC_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

MaineDMR_obs <- presence[presence$institutioncode == "MaineDMR", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "Maine DMR Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(MaineDMR_obs$decimalLongitude, MaineDMR_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/MaineDMR_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

NEFSC_obs <- presence[presence$institutioncode == "NEFSC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "NEFSC Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(NEFSC_obs$decimalLongitude, NEFSC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/NEFSC_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

ROM_obs <- presence[presence$institutioncode == "ROM", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "Royal Ontario Museum Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(ROM_obs$decimalLongitude, ROM_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/ROM_occurrences_all.png") #this prints a png of the plot
```

# check for gear type

what are the unique gear types you have in your presence data, and how many?

```{r observation by cell - total}
gear_count <- count(presence, "gear")
write.csv(gear_count, file = "../output/bio/gear_count.csv")
gear_count
```

so the vast majority are trawls of some type. 

map the gear usage in Arcgis (gear_type_map)

create a table of gear use by month

```{r}
gearby_mth <- with(presence, table(month, gear))
write.csv(gearby_mth, file = "../output/bio/gear_by_mth.csv")
gearby_mth
```

What I want to see if if there is a marked difference between the env. correlates of the presence points between gear type used. This is also to try deal with detection bias between gear type (b/c the whole region is not uniformly sampled by the same gear type)

first create gear datasets

```{r presence by gear}
presence$gear <- as.character(presence$gear)
bottom_trawl <- subset(presence, gear == "bottom_trawl")
bottom_trawl_alfredo_03 <- subset(presence, gear == "bottom_trawl_alfredo_03")
bottom_trawl_campelen_14 <- subset(presence, gear == "bottom_trawl_campelen_14")
bottom_trawl_campelen_1800 <- subset(presence, gear == "bottom_trawl_campelen_1800")
bottom_trawl_campelen_21 <- subset(presence, gear == "bottom_trawl_campelen_21")
bottom_trawl_cosmos_2600 <- subset(presence, gear == "bottom_trawl_cosmos_2600")
bottom_trawl_western_IIA <- subset(presence, gear == "bottom_trawl_western_IIA")
unknown <- subset(presence, gear == "unknown")
vertical_plankton_tow <- subset(presence, gear == "vertical_plankton_tow")
```

plot by each variable, less 3m (2 samples) 1c (one sample) and 5ze (zero samples?!)
```{r presence by gear by variable}
ggplot(bottom_trawl, aes(x = temp_surface, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/temp_surface_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = temp_depth, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/temp_depth_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = chl_surface, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/chl_surface_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = chl_depth, colour = gear))  + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/chl_depth_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = salinity_surface, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/salinity_surface_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = salinity_depth, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/salinity_depth_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = o2_surface, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/o2_surface_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = o2_depth, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/o2_depth_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = mlp_surface, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/mlp_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = ssh_surface, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/ssh_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```





```{r}
par(mar=c(7,5,1,1))
boxplot(presence$temp_depth ~ presence$gear, ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth by gear type", las = 2)
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_dept_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

looks like not much difference... what about a kruskal wallace test?

```{r}
kruskal.test(presence$temp_depth ~ presence$gear)
```
Ok so there is a statistically sig difference somewhere in the temp at depth reported by the gear type (Kruskal-Wallis chi-squared = 248.27, df = 7, p-value < 2.2e-16)

To see where the difference(s) are run a Dunn test Zar (2010) states that the Dunn test (in the FSA package) is appropriate for groups with unequal numbers of observations.(Zar, J.H. 2010. Biostatistical Analysis, 5th ed.  Pearson Prentice Hall: Upper Saddle River, NJ.) http://rcompanion.org/rcompanion/d_06.html

SAM WHEN DUNN IS INSTALLED, GO TO RCOMPANIONS.ORG AND LOOK FOR DUNN - CHECK CHROME HISTORY (PROBABLY UNDER KRUSKAL WALLIS) ALSO MAP THE DISTRIBUTION OF THESE GEAR TYPES
```{r}
pairwise.wilcox.test(presence$temp_depth, presence$gear, p.adj='bonferroni', exact=F)
```

dunn test 
```{r}
gear_temp_depthdunn <- dunnTest(presence$temp_depth, presence$gear, list = TRUE)
gear_temp_depthdunn



write.csv(table, "../output/env/gear_temp_depthdunn.csv", row.names = TRUE)
```


#vif

for this you need the joined dataset. this was created under mergig_datasets.src and the file is ../output/bio/presab.csv

```{r}
presab <- read.csv("../output/bio/presab.csv", header = TRUE)
```


```{r}
vif_allpresab <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allpresab, "../output/bio/vif_allpresab.csv", row.names = TRUE)
vif_allpresab
```
interpret - see https://stats.stackexchange.com/questions/70679/which-variance-inflation-factor-should-i-be-using-textgvif-or-textgvif/96584#96584
To make GVIFs comparable across dimensions, we suggested using GVIF^(1/(2*Df)), where Df is the number of coefficients in the subset (ref fox and motette 1992 in zotero)
or the 2 continuous variables, GVIFˆ(1/(2*Df)) (which is basically the square root of the VIF/GVIF value as DF=1) is the proportional change of the standard error and confidence interval of their coefficients due to the level of collinearity. The GVIFˆ(1/(2*Df)) value of the categorical variable is a similar measure for the reduction in precision of the coefficients' estimation due to collinearity. 
apparently i just need to square GVIF^(1/(2*Df)) and then use the normal VIF "rule of thumb"...

```{r}
vif_allpresab_sq <- read.csv("../output/bio/vif_allpresab.csv", header = TRUE)
vif_allpresab_sq$GVIF2Dfsq <- vif_allpresab_sq$GVIF..1..2.Df..^2
write.csv(vif_allpresab_sq, "../output/bio/vif_allpresab_sq.csv", row.names = TRUE)
vif_allpresab_sq
```

As per SLR suggestion, rerun without gear

```{r vif all but gear}
vif_allbutgear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutgear, "../output/bio/vif_allbutgear.csv", row.names = TRUE)
vif_allbutgear
```

As per SLR suggestion, rerun without gear and most highly correlated variables 

```{r vif without gear + high corr}
vif_allbutgearhighlycorr <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbutgearhighlycorr, "../output/bio/vif_allbutgearhighlycorr.csv", row.names = TRUE)
vif_allbutgearhighlycorr
```


As per SLR suggestion, rerun with gear but without most highly correlated variables

```{r vif all but high corr}
vif_allbuthighlycorr <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter + gear, data = presab))
write.csv(vif_allbuthighlycorr, "../output/bio/vif_allbuthighlycorr.csv", row.names = TRUE)
vif_allbuthighlycorr

vif_allbuthighlycorr_sq <- read.csv("../output/bio/vif_allbuthighlycorr.csv", header = TRUE)
vif_allbuthighlycorr_sq$GVIF2Dfsq <- vif_allbuthighlycorr_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuthighlycorr_sq, "../output/bio/vif_allbuthighlycorr_sq.csv", row.names = TRUE)
vif_allbuthighlycorr_sq
```

ok remove one variable at a time - leave gear in

remove amo_prev
```{r vif all but amoprev}
vif_allbutamo_prev <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbutamo_prev, "../output/bio/vif_allbutamo_prev.csv", row.names = TRUE)
vif_allbutamo_prev

vif_allbutamo_prev_sq <- read.csv("../output/bio/vif_allbutamo_prev.csv", header = TRUE)
vif_allbutamo_prev_sq$GVIF2Dfsq <- vif_allbutamo_prev_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutamo_prev_sq, "../output/bio/vif_allbutamo_prev_sq.csv", row.names = TRUE)
vif_allbutamo_prev_sq
```
and leave gear out

remove amo_prev + gear
```{r vif all but amoprev + gear}
vif_allbutamo_prevgear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbutamo_prevgear, "../output/bio/vif_allbutamo_prevgear.csv", row.names = TRUE)
vif_allbutamo_prevgear
```

remove chl_depth
```{r vif all but chldepth}
vif_allbutchl_depth <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutchl_depth, "../output/bio/vif_allbutchl_depth.csv", row.names = TRUE)
vif_allbutchl_depth

vif_allbutchl_depth_sq <- read.csv("../output/bio/vif_allbutchl_depth.csv", header = TRUE)
vif_allbutchl_depth_sq$GVIF2Dfsq <- vif_allbutchl_depth_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutchl_depth_sq, "../output/bio/vif_allbutchl_depth_sq.csv", row.names = TRUE)
vif_allbutchl_depth_sq
```

remove chl_depth and gear
```{r vif all but chldepth+gear}
vif_allbutchl_depthgear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutchl_depthgear, "../output/bio/vif_allbutchl_depthgear.csv", row.names = TRUE)
vif_allbutchl_depthgear
```


remove ssh_surface
```{r vif all but ssh}
vif_allbutssh_surface <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutssh_surface, "../output/bio/vif_allbutssh_surface.csv", row.names = TRUE)
vif_allbutssh_surface

vif_allbutssh_surface_sq <- read.csv("../output/bio/vif_allbutssh_surface.csv", header = TRUE)
vif_allbutssh_surface_sq$GVIF2Dfsq <- vif_allbutssh_surface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutssh_surface_sq, "../output/bio/vif_allbutssh_surface_sq.csv", row.names = TRUE)
vif_allbutssh_surface_sq
```

remove ssh_surface & gear
```{r vif all but ssh+gear}
vif_allbutssh_surfacegear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutssh_surfacegear, "../output/bio/vif_allbutssh_surfacegear.csv", row.names = TRUE)
vif_allbutssh_surfacegear
```

remove o2_surface
```{r vif all but o2surface}
vif_allbuto2_surface <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbuto2_surface, "../output/bio/vif_allbuto2_surface.csv", row.names = TRUE)
vif_allbuto2_surface

vif_allbuto2_surface_sq <- read.csv("../output/bio/vif_allbuto2_surface.csv", header = TRUE)
vif_allbuto2_surface_sq$GVIF2Dfsq <- vif_allbuto2_surface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuto2_surface_sq, "../output/bio/vif_allbuto2_surface_sq.csv", row.names = TRUE)
vif_allbuto2_surface_sq
```

remove o2_surface & gear
```{r vif all but o2surface+gear}
vif_allbuto2_surfacegear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbuto2_surfacegear, "../output/bio/vif_allbuto2_surfacegear.csv", row.names = TRUE)
vif_allbuto2_surfacegear
```

as per SLR chat jan 23: 

remove salinity_surface only
```{r vif all but salsurface}
vif_allbutsalinitysurface <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutsalinitysurface, "../output/bio/vif_allbutsalinitysurface.csv", row.names = TRUE)
vif_allbutsalinitysurface

vif_allbutsalinitysurface_sq <- read.csv("../output/bio/vif_allbutsalinitysurface.csv", header = TRUE)
vif_allbutsalinitysurface_sq$GVIF2Dfsq <- vif_allbutsalinitysurface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutsalinitysurface_sq, "../output/bio/vif_allbutsalinitysurface_sq.csv", row.names = TRUE)
vif_allbutsalinitysurface_sq
```
and without gear

```{r vif all but salsurface+gear}
vif_allbutsalinitysurfacegear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutsalinitysurfacegear, "../output/bio/vif_allbutsalinitysurfacegear.csv", row.names = TRUE)
vif_allbutsalinitysurfacegear
```

now remove temp_surface plus highly correlated
```{r vif allbut high corr + temp surface/gear/nogear}
#with gear
vif_allbuthighcorrtempsurface <- vif(lm(occurrence ~ temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbuthighcorrtempsurface, "../output/bio/vif_allbuthighcorrtempsurface.csv", row.names = TRUE)
vif_allbuthighcorrtempsurface

vif_allbuthighcorrtempsurface_sq <- read.csv("../output/bio/vif_allbuthighcorrtempsurface.csv", header = TRUE)
vif_allbuthighcorrtempsurface_sq$GVIF2Dfsq <- vif_allbuthighcorrtempsurface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuthighcorrtempsurface_sq, "../output/bio/vif_allbuthighcorrtempsurface_sq.csv", row.names = TRUE)
vif_allbuthighcorrtempsurface_sq

#without gear
vif_allbuthighcorrtempsurfacegear <- vif(lm(occurrence ~ temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbuthighcorrtempsurfacegear, "../output/bio/vif_allbuthighcorrtempsurfacegear.csv", row.names = TRUE)
vif_allbuthighcorrtempsurfacegear
```

now remove temp_surface plus salinity_surface
```{r vif allbut temp + salinity surface/gear/nogear}
#with gear
vif_allbuttempsalsurface <- vif(lm(occurrence ~ temp_depth + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbuttempsalsurface, "../output/bio/vif_allbuttempsalsurface.csv", row.names = TRUE)
vif_allbuttempsalsurface

vif_allbuttempsalsurface_sq <- read.csv("../output/bio/vif_allbuttempsalsurface.csv", header = TRUE)
vif_allbuttempsalsurface_sq$GVIF2Dfsq <- vif_allbuttempsalsurface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuttempsalsurface_sq, "../output/bio/vif_allbuttempsalsurface_sq.csv", row.names = TRUE)
vif_allbuttempsalsurface_sq

#without gear
vif_allbuttempsalsurfacegear <- vif(lm(occurrence ~ temp_depth + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbuttempsalsurfacegear, "../output/bio/vif_allbuttempsalsurfacegear.csv", row.names = TRUE)
vif_allbuttempsalsurfacegear
```

now remove temp_surface plus salinity_surface + highly correlated
```{r vif allbut high corr +  temp + salinity surface/gear/nogear}
#with gear
vif_allbuthighcorrtempsalsurface <- vif(lm(occurrence ~ temp_depth + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbuthighcorrtempsalsurface, "../output/bio/vif_allbuthighcorrtempsalsurface.csv", row.names = TRUE)
vif_allbuthighcorrtempsalsurface

vif_allbuthighcorrtempsalsurface_sq <- read.csv("../output/bio/vif_allbuthighcorrtempsalsurface.csv", header = TRUE)
vif_allbuthighcorrtempsalsurface_sq$GVIF2Dfsq <- vif_allbuthighcorrtempsalsurface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuthighcorrtempsalsurface_sq, "../output/bio/vif_allbuthighcorrtempsalsurface_sq.csv", row.names = TRUE)
vif_allbuthighcorrtempsalsurface_sq

#without gear
vif_allbuthighcorrtempsalsurfacegear <- vif(lm(occurrence ~ temp_depth + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbuthighcorrtempsalsurfacegear, "../output/bio/vif_allbuthighcorrtempsalsurfacegear.csv", row.names = TRUE)
vif_allbuthighcorrtempsalsurfacegear
```

Just out of curiosity, a vif will all but but atmos drivers

```{r}
vif_allbutnaoamo <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear, data = presab))
write.csv(vif_allbutnaoamo, "../output/bio/vif_allbutnaoamo.csv", row.names = TRUE)
vif_allbutnaoamo

vif_allbutnaoamo_sq <- read.csv("../output/bio/vif_allbutnaoamo.csv", header = TRUE)
vif_allbutnaoamo_sq$GVIF2Dfsq <- vif_allbutnaoamo_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutnaoamo_sq, "../output/bio/vif_allbutnaoamo_sq.csv", row.names = TRUE)
vif_allbutnaoamo_sq
```

# check to see how many cells have >1 point per timeslice

this needs to be 3D so you need to create a cell_id_3d
```{r cell_id_3d}
presab$cell_id_3d <- paste(presab$cell_id, presab$depthlayerno, sep="_") #create a new column that is a unique cell_id & depth ID. 
head(presab)
```
and now do one with a timeslice label
```{r cell_id_xyzt}
presab$cell_id_xyzt <- paste(presab$cell_id_3d, presab$year, presab$month, sep="_") #create a new column that is a unique cell_id & depth ID. 
head(presab)
write.csv(presab, "../output/bio/presab_cellid_xyzt.csv", row.names = FALSE)
```


because background points can land in the same xyzt, subset the dataset to be only presences

```{r}
presencedup <- subset(presab, occurrence == "1")
nrow(presencedup)
```


the data is in presencedup$total_cell_obs_xyzt
```{r unique_pointpercell}
unique_pointpercell <- unique(presencedup$total_cell_obs_xyzt)
unique_pointpercell
```
bugger... ok so lets do a frequency table
```{r unique_pointpercell frequencies}
pointpercellcount <- count(celobsmore1, "total_cell_obs_xyzt")
pointpercellcount$no_cells <- pointpercellcount$freq / pointpercellcount$total_cell_obs_xyzt
write.csv(pointpercellcount, file = "../output/bio/pointpercellcount.csv")
pointpercellcount
```

so there are 949 3d timesliced cells that contain more than one occurrence, and 2042 rows. 
- different sampling days
- different coordinates
- risk of duplicate entries (but only if two instituions inputted the same data with slightly different coordinates and different reserch program names - see cell3985_18_2000_7 below as example)

```{r}
cell3985_18_2000_7  <- subset(celobsmore1, cell_id_xyzt == "3985_18_2000_7")
cell3985_18_2000_7 
```

Ideally you only want one observation cell cell_id_xyzt. First see if any are missing depth data

find the column numbers that correspond to the env. data (use fastmatch package as is quicker)

```{r}
fmatch("chl_surface", names(celobsmore1)) #28
fmatch("chl_depth", names(celobsmore1)) #29
fmatch("mlp_surface", names(celobsmore1)) #30
fmatch("o2_surface", names(celobsmore1)) #31
fmatch("o2_depth", names(celobsmore1)) #32
fmatch("salinity_surface", names(celobsmore1)) #33
fmatch("salinity_depth", names(celobsmore1)) #34
fmatch("ssh_surface", names(celobsmore1)) #35
fmatch("temp_surface", names(celobsmore1)) #36
fmatch("temp_depth", names(celobsmore1)) #37
```

then subset any with NA values

```{r celobsmore1_novals}
celobsmore1_novals <-  celobsmore1[!complete.cases(celobsmore1[28:37]), ] #28:37 represent the columns w/ env. data
nrow(celobsmore1_novals) #just to check how many there are - 1095 out of 2042
```

ok and subset with all values

```{r celobsmore1_allval}
celobsmore1_allvals <- celobsmore1[complete.cases(celobsmore1[28:37]), ] #28:37 represent the columns w/ env. data
nrow(celobsmore1_allvals) #947 out of 2042
```

ok so how many unique cells do we have with complete data...

```{r}
pointpercellcount_alldata <- count(celobsmore1_allvals, "total_cell_obs_xyzt")
pointpercellcount_alldata$no_cells <- pointpercellcount_alldata$freq / pointpercellcount_alldata$total_cell_obs_xyzt
write.csv(pointpercellcount_alldata, file = "../output/bio/pointpercellcount_alldata.csv")
pointpercellcount_alldata
```

and see which cell_id_xyzt appear in both celobsmore1_allvals and celobsmore1_novals
```{r}
celobsmore1_allnovals <- inner_join(celobsmore1_allvals, celobsmore1_novals)
celobsmore1_allnovals 
```
None.... which is good. I'd expect observations in the same xyzt to have the same values

so, I can successfully reduce the observations down to one per xyzt

```{r}
fmatch("cell_id_xyzt", names(celobsmore1)) #46
duplicatesbegone <- celobsmore1[!duplicated(celobsmore1[ ,46]), ] #creates a df with all duplicates gone (now 1 obs per cell)
nrow(duplicatesbegone) #949 - good
presabnodup <- subset(presab, total_cell_obs_xyzt == "1") #remove all rows that have more than one obs
nrow(presabnodup) #1549351 - this adds up 
presab <- rbind(presabnodup, duplicatesbegone)
nrow(presab) #1550300 - great!

#recalculate the total_cell_obs_xyzt column

names(presab)[names(presab)=="total_cell_obs_xyzt"] <- "XXtotal_cell_obs_xyzt" #rename col
obs_cell_yymm_depth_dup <- count(presab, cell_id, year, month, depthlayerno)
test <- merge(presab, obs_cell_yymm_depth_dup, by.x=c("cell_id", "year", "month", "depthlayerno"), by.y=c("cell_id", "year", "month", "depthlayerno"))
names(test)[names(test)=="n"] <- "total_cell_obs_xyzt" #rename col

write.csv(presab, "../output/bio/presab_cellid_xyzt_nodup.csv")
rm(presabnodup, duplicatesbegone, celobsmore1, celobsmore1_novals, celobsmore1_allnovals, celobsmore1_allvals) #just because they are bigish variables
```



# monthly spearmans correlations and VIF

curious - does correlations/vif alter between months?

First split the dataset into monthly

```{r subset monthly presab}
janpresab <- subset(presab, month == "1")
febpresab <- subset(presab, month == "2")
marpresab <- subset(presab, month == "3")
aprpresab <- subset(presab, month == "4")
maypresab <- subset(presab, month == "5")
junpresab <- subset(presab, month == "6")
julpresab <- subset(presab, month == "7")
augpresab <- subset(presab, month == "8")
seppresab <- subset(presab, month == "9")
octpresab <- subset(presab, month == "10")
novpresab <- subset(presab, month == "11")
decpresab <- subset(presab, month == "12")
```

now get the background points for each month (for spearmans)
```{r monthly background points}
janback <- subset(janpresab, occurrence == "0")
febback <- subset(febpresab, occurrence == "0")
marback <- subset(marpresab, occurrence == "0")
aprback <- subset(aprpresab, occurrence == "0")
mayback <- subset(maypresab, occurrence == "0")
junback <- subset(junpresab, occurrence == "0")
julback <- subset(julpresab, occurrence == "0")
augback <- subset(augpresab, occurrence == "0")
sepback <- subset(seppresab, occurrence == "0")
octback <- subset(octpresab, occurrence == "0")
novback <- subset(novpresab, occurrence == "0")
decback <- subset(decpresab, occurrence == "0")
```

run the correlations on each month's background points

```{r monthly spearman correlations}
janback <- subset(janback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
janback_cor <- cor(janback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(janback_cor, "../output/env/janback_cor.csv", row.names = TRUE)

febback <- subset(febback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
febback_cor <- cor(febback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(febback_cor, "../output/env/febback_cor.csv", row.names = TRUE)

marback <- subset(marback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
marback_cor <- cor(marback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(marback_cor, "../output/env/marback_cor.csv", row.names = TRUE)

aprback <- subset(aprback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
aprback_cor <- cor(aprback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(aprback_cor, "../output/env/aprback_cor.csv", row.names = TRUE)

mayback <- subset(mayback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
mayback_cor <- cor(mayback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(mayback_cor, "../output/env/mayback_cor.csv", row.names = TRUE)

junback <- subset(junback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
junback_cor <- cor(junback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(junback_cor, "../output/env/junback_cor.csv", row.names = TRUE)

julback <- subset(julback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
julback_cor <- cor(julback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(julback_cor, "../output/env/julback_cor.csv", row.names = TRUE)

augback <- subset(augback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
augback_cor <- cor(augback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(augback_cor, "../output/env/augback_cor.csv", row.names = TRUE)

sepback <- subset(sepback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
sepback_cor <- cor(sepback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(sepback_cor, "../output/env/sepback_cor.csv", row.names = TRUE)

octback <- subset(octback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
octback_cor <- cor(octback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(octback_cor, "../output/env/octback_cor.csv", row.names = TRUE)

novback <- subset(novback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
novback_cor <- cor(novback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(novback_cor, "../output/env/novback_cor.csv", row.names = TRUE)

decback <- subset(decback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
decback_cor <- cor(decback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(decback_cor, "../output/env/decback_cor.csv", row.names = TRUE)
```



#maxent?

```{r}
library("raster")
library("dismo")
library("rgeos")
```

